diff options
Diffstat (limited to 'Documentation')
43 files changed, 881 insertions, 242 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 40ac7759c3bb..d273b557a934 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX | |||
@@ -126,18 +126,16 @@ devices.txt | |||
126 | - plain ASCII listing of all the nodes in /dev/ with major minor #'s. | 126 | - plain ASCII listing of all the nodes in /dev/ with major minor #'s. |
127 | digiepca.txt | 127 | digiepca.txt |
128 | - info on Digi Intl. {PC,PCI,EISA}Xx and Xem series cards. | 128 | - info on Digi Intl. {PC,PCI,EISA}Xx and Xem series cards. |
129 | dnotify.txt | ||
130 | - info about directory notification in Linux. | ||
131 | dontdiff | 129 | dontdiff |
132 | - file containing a list of files that should never be diff'ed. | 130 | - file containing a list of files that should never be diff'ed. |
133 | driver-model/ | 131 | driver-model/ |
134 | - directory with info about Linux driver model. | 132 | - directory with info about Linux driver model. |
135 | drivers/ | ||
136 | - directory with driver documentation (currently only EDAC). | ||
137 | dvb/ | 133 | dvb/ |
138 | - info on Linux Digital Video Broadcast (DVB) subsystem. | 134 | - info on Linux Digital Video Broadcast (DVB) subsystem. |
139 | early-userspace/ | 135 | early-userspace/ |
140 | - info about initramfs, klibc, and userspace early during boot. | 136 | - info about initramfs, klibc, and userspace early during boot. |
137 | edac.txt | ||
138 | - information on EDAC - Error Detection And Correction | ||
141 | eisa.txt | 139 | eisa.txt |
142 | - info on EISA bus support. | 140 | - info on EISA bus support. |
143 | exception.txt | 141 | exception.txt |
@@ -334,20 +332,8 @@ rtc.txt | |||
334 | - notes on how to use the Real Time Clock (aka CMOS clock) driver. | 332 | - notes on how to use the Real Time Clock (aka CMOS clock) driver. |
335 | s390/ | 333 | s390/ |
336 | - directory with info on using Linux on the IBM S390. | 334 | - directory with info on using Linux on the IBM S390. |
337 | sched-arch.txt | 335 | scheduler/ |
338 | - CPU Scheduler implementation hints for architecture specific code. | 336 | - directory with info on the scheduler. |
339 | sched-coding.txt | ||
340 | - reference for various scheduler-related methods in the O(1) scheduler. | ||
341 | sched-design.txt | ||
342 | - goals, design and implementation of the Linux O(1) scheduler. | ||
343 | sched-design-CFS.txt | ||
344 | - goals, design and implementation of the Complete Fair Scheduler. | ||
345 | sched-domains.txt | ||
346 | - information on scheduling domains. | ||
347 | sched-nice-design.txt | ||
348 | - How and why the scheduler's nice levels are implemented. | ||
349 | sched-stats.txt | ||
350 | - information on schedstats (Linux Scheduler Statistics). | ||
351 | scsi/ | 337 | scsi/ |
352 | - directory with info on Linux scsi support. | 338 | - directory with info on Linux scsi support. |
353 | serial/ | 339 | serial/ |
@@ -360,8 +346,6 @@ sgi-visws.txt | |||
360 | - short blurb on the SGI Visual Workstations. | 346 | - short blurb on the SGI Visual Workstations. |
361 | sh/ | 347 | sh/ |
362 | - directory with info on porting Linux to a new architecture. | 348 | - directory with info on porting Linux to a new architecture. |
363 | sharedsubtree.txt | ||
364 | - a description of shared subtrees for namespaces. | ||
365 | smart-config.txt | 349 | smart-config.txt |
366 | - description of the Smart Config makefile feature. | 350 | - description of the Smart Config makefile feature. |
367 | sony-laptop.txt | 351 | sony-laptop.txt |
diff --git a/Documentation/ABI/testing/sysfs-kernel-uids b/Documentation/ABI/testing/sysfs-kernel-uids index 648d65dbc0e7..28f14695a852 100644 --- a/Documentation/ABI/testing/sysfs-kernel-uids +++ b/Documentation/ABI/testing/sysfs-kernel-uids | |||
@@ -11,4 +11,4 @@ Description: | |||
11 | example would be, if User A has shares = 1024 and user | 11 | example would be, if User A has shares = 1024 and user |
12 | B has shares = 2048, User B will get twice the CPU | 12 | B has shares = 2048, User B will get twice the CPU |
13 | bandwidth user A will. For more details refer | 13 | bandwidth user A will. For more details refer |
14 | Documentation/sched-design-CFS.txt | 14 | Documentation/scheduler/sched-design-CFS.txt |
diff --git a/Documentation/BUG-HUNTING b/Documentation/BUG-HUNTING index 6c816751b868..65022a87bf17 100644 --- a/Documentation/BUG-HUNTING +++ b/Documentation/BUG-HUNTING | |||
@@ -214,6 +214,23 @@ And recompile the kernel with CONFIG_DEBUG_INFO enabled: | |||
214 | gdb vmlinux | 214 | gdb vmlinux |
215 | (gdb) p vt_ioctl | 215 | (gdb) p vt_ioctl |
216 | (gdb) l *(0x<address of vt_ioctl> + 0xda8) | 216 | (gdb) l *(0x<address of vt_ioctl> + 0xda8) |
217 | or, as one command | ||
218 | (gdb) l *(vt_ioctl + 0xda8) | ||
219 | |||
220 | If you have a call trace, such as :- | ||
221 | >Call Trace: | ||
222 | > [<ffffffff8802c8e9>] :jbd:log_wait_commit+0xa3/0xf5 | ||
223 | > [<ffffffff810482d9>] autoremove_wake_function+0x0/0x2e | ||
224 | > [<ffffffff8802770b>] :jbd:journal_stop+0x1be/0x1ee | ||
225 | > ... | ||
226 | this shows the problem in the :jbd: module. You can load that module in gdb | ||
227 | and list the relevant code. | ||
228 | gdb fs/jbd/jbd.ko | ||
229 | (gdb) p log_wait_commit | ||
230 | (gdb) l *(0x<address> + 0xa3) | ||
231 | or | ||
232 | (gdb) l *(log_wait_commit + 0xa3) | ||
233 | |||
217 | 234 | ||
218 | Another very useful option of the Kernel Hacking section in menuconfig is | 235 | Another very useful option of the Kernel Hacking section in menuconfig is |
219 | Debug memory allocations. This will help you see whether data has been | 236 | Debug memory allocations. This will help you see whether data has been |
diff --git a/Documentation/DocBook/genericirq.tmpl b/Documentation/DocBook/genericirq.tmpl index 4215f69ce7e6..3a882d9a90a9 100644 --- a/Documentation/DocBook/genericirq.tmpl +++ b/Documentation/DocBook/genericirq.tmpl | |||
@@ -172,7 +172,7 @@ | |||
172 | <listitem><para>Chiplevel hardware encapsulation</para></listitem> | 172 | <listitem><para>Chiplevel hardware encapsulation</para></listitem> |
173 | </orderedlist> | 173 | </orderedlist> |
174 | </para> | 174 | </para> |
175 | <sect1> | 175 | <sect1 id="Interrupt_control_flow"> |
176 | <title>Interrupt control flow</title> | 176 | <title>Interrupt control flow</title> |
177 | <para> | 177 | <para> |
178 | Each interrupt is described by an interrupt descriptor structure | 178 | Each interrupt is described by an interrupt descriptor structure |
@@ -190,7 +190,7 @@ | |||
190 | referenced by the assigned chip descriptor structure. | 190 | referenced by the assigned chip descriptor structure. |
191 | </para> | 191 | </para> |
192 | </sect1> | 192 | </sect1> |
193 | <sect1> | 193 | <sect1 id="Highlevel_Driver_API"> |
194 | <title>Highlevel Driver API</title> | 194 | <title>Highlevel Driver API</title> |
195 | <para> | 195 | <para> |
196 | The highlevel Driver API consists of following functions: | 196 | The highlevel Driver API consists of following functions: |
@@ -210,7 +210,7 @@ | |||
210 | See the autogenerated function documentation for details. | 210 | See the autogenerated function documentation for details. |
211 | </para> | 211 | </para> |
212 | </sect1> | 212 | </sect1> |
213 | <sect1> | 213 | <sect1 id="Highlevel_IRQ_flow_handlers"> |
214 | <title>Highlevel IRQ flow handlers</title> | 214 | <title>Highlevel IRQ flow handlers</title> |
215 | <para> | 215 | <para> |
216 | The generic layer provides a set of pre-defined irq-flow methods: | 216 | The generic layer provides a set of pre-defined irq-flow methods: |
@@ -224,9 +224,9 @@ | |||
224 | specific) are assigned to specific interrupts by the architecture | 224 | specific) are assigned to specific interrupts by the architecture |
225 | either during bootup or during device initialization. | 225 | either during bootup or during device initialization. |
226 | </para> | 226 | </para> |
227 | <sect2> | 227 | <sect2 id="Default_flow_implementations"> |
228 | <title>Default flow implementations</title> | 228 | <title>Default flow implementations</title> |
229 | <sect3> | 229 | <sect3 id="Helper_functions"> |
230 | <title>Helper functions</title> | 230 | <title>Helper functions</title> |
231 | <para> | 231 | <para> |
232 | The helper functions call the chip primitives and | 232 | The helper functions call the chip primitives and |
@@ -267,9 +267,9 @@ noop(irq) | |||
267 | </para> | 267 | </para> |
268 | </sect3> | 268 | </sect3> |
269 | </sect2> | 269 | </sect2> |
270 | <sect2> | 270 | <sect2 id="Default_flow_handler_implementations"> |
271 | <title>Default flow handler implementations</title> | 271 | <title>Default flow handler implementations</title> |
272 | <sect3> | 272 | <sect3 id="Default_Level_IRQ_flow_handler"> |
273 | <title>Default Level IRQ flow handler</title> | 273 | <title>Default Level IRQ flow handler</title> |
274 | <para> | 274 | <para> |
275 | handle_level_irq provides a generic implementation | 275 | handle_level_irq provides a generic implementation |
@@ -284,7 +284,7 @@ desc->chip->end(); | |||
284 | </programlisting> | 284 | </programlisting> |
285 | </para> | 285 | </para> |
286 | </sect3> | 286 | </sect3> |
287 | <sect3> | 287 | <sect3 id="Default_Edge_IRQ_flow_handler"> |
288 | <title>Default Edge IRQ flow handler</title> | 288 | <title>Default Edge IRQ flow handler</title> |
289 | <para> | 289 | <para> |
290 | handle_edge_irq provides a generic implementation | 290 | handle_edge_irq provides a generic implementation |
@@ -311,7 +311,7 @@ desc->chip->end(); | |||
311 | </programlisting> | 311 | </programlisting> |
312 | </para> | 312 | </para> |
313 | </sect3> | 313 | </sect3> |
314 | <sect3> | 314 | <sect3 id="Default_simple_IRQ_flow_handler"> |
315 | <title>Default simple IRQ flow handler</title> | 315 | <title>Default simple IRQ flow handler</title> |
316 | <para> | 316 | <para> |
317 | handle_simple_irq provides a generic implementation | 317 | handle_simple_irq provides a generic implementation |
@@ -328,7 +328,7 @@ handle_IRQ_event(desc->action); | |||
328 | </programlisting> | 328 | </programlisting> |
329 | </para> | 329 | </para> |
330 | </sect3> | 330 | </sect3> |
331 | <sect3> | 331 | <sect3 id="Default_per_CPU_flow_handler"> |
332 | <title>Default per CPU flow handler</title> | 332 | <title>Default per CPU flow handler</title> |
333 | <para> | 333 | <para> |
334 | handle_percpu_irq provides a generic implementation | 334 | handle_percpu_irq provides a generic implementation |
@@ -349,7 +349,7 @@ desc->chip->end(); | |||
349 | </para> | 349 | </para> |
350 | </sect3> | 350 | </sect3> |
351 | </sect2> | 351 | </sect2> |
352 | <sect2> | 352 | <sect2 id="Quirks_and_optimizations"> |
353 | <title>Quirks and optimizations</title> | 353 | <title>Quirks and optimizations</title> |
354 | <para> | 354 | <para> |
355 | The generic functions are intended for 'clean' architectures and chips, | 355 | The generic functions are intended for 'clean' architectures and chips, |
@@ -358,7 +358,7 @@ desc->chip->end(); | |||
358 | overriding the highlevel irq-flow handler. | 358 | overriding the highlevel irq-flow handler. |
359 | </para> | 359 | </para> |
360 | </sect2> | 360 | </sect2> |
361 | <sect2> | 361 | <sect2 id="Delayed_interrupt_disable"> |
362 | <title>Delayed interrupt disable</title> | 362 | <title>Delayed interrupt disable</title> |
363 | <para> | 363 | <para> |
364 | This per interrupt selectable feature, which was introduced by Russell | 364 | This per interrupt selectable feature, which was introduced by Russell |
@@ -380,7 +380,7 @@ desc->chip->end(); | |||
380 | </para> | 380 | </para> |
381 | </sect2> | 381 | </sect2> |
382 | </sect1> | 382 | </sect1> |
383 | <sect1> | 383 | <sect1 id="Chiplevel_hardware_encapsulation"> |
384 | <title>Chiplevel hardware encapsulation</title> | 384 | <title>Chiplevel hardware encapsulation</title> |
385 | <para> | 385 | <para> |
386 | The chip level hardware descriptor structure irq_chip | 386 | The chip level hardware descriptor structure irq_chip |
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index 77436d735013..059aaf20951a 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl | |||
@@ -165,6 +165,7 @@ X!Ilib/string.c | |||
165 | !Emm/vmalloc.c | 165 | !Emm/vmalloc.c |
166 | !Imm/page_alloc.c | 166 | !Imm/page_alloc.c |
167 | !Emm/mempool.c | 167 | !Emm/mempool.c |
168 | !Emm/dmapool.c | ||
168 | !Emm/page-writeback.c | 169 | !Emm/page-writeback.c |
169 | !Emm/truncate.c | 170 | !Emm/truncate.c |
170 | </sect1> | 171 | </sect1> |
@@ -371,7 +372,6 @@ X!Iinclude/linux/device.h | |||
371 | !Edrivers/base/class.c | 372 | !Edrivers/base/class.c |
372 | !Edrivers/base/firmware_class.c | 373 | !Edrivers/base/firmware_class.c |
373 | !Edrivers/base/transport_class.c | 374 | !Edrivers/base/transport_class.c |
374 | !Edrivers/base/dmapool.c | ||
375 | <!-- Cannot be included, because | 375 | <!-- Cannot be included, because |
376 | attribute_container_add_class_device_adapter | 376 | attribute_container_add_class_device_adapter |
377 | and attribute_container_classdev_to_container | 377 | and attribute_container_classdev_to_container |
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 01825ee7db64..2e9d6b41f034 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl | |||
@@ -717,7 +717,7 @@ used, and when it gets full, throws out the least used one. | |||
717 | <para> | 717 | <para> |
718 | For our first example, we assume that all operations are in user | 718 | For our first example, we assume that all operations are in user |
719 | context (ie. from system calls), so we can sleep. This means we can | 719 | context (ie. from system calls), so we can sleep. This means we can |
720 | use a semaphore to protect the cache and all the objects within | 720 | use a mutex to protect the cache and all the objects within |
721 | it. Here's the code: | 721 | it. Here's the code: |
722 | </para> | 722 | </para> |
723 | 723 | ||
@@ -725,7 +725,7 @@ it. Here's the code: | |||
725 | #include <linux/list.h> | 725 | #include <linux/list.h> |
726 | #include <linux/slab.h> | 726 | #include <linux/slab.h> |
727 | #include <linux/string.h> | 727 | #include <linux/string.h> |
728 | #include <asm/semaphore.h> | 728 | #include <linux/mutex.h> |
729 | #include <asm/errno.h> | 729 | #include <asm/errno.h> |
730 | 730 | ||
731 | struct object | 731 | struct object |
@@ -737,7 +737,7 @@ struct object | |||
737 | }; | 737 | }; |
738 | 738 | ||
739 | /* Protects the cache, cache_num, and the objects within it */ | 739 | /* Protects the cache, cache_num, and the objects within it */ |
740 | static DECLARE_MUTEX(cache_lock); | 740 | static DEFINE_MUTEX(cache_lock); |
741 | static LIST_HEAD(cache); | 741 | static LIST_HEAD(cache); |
742 | static unsigned int cache_num = 0; | 742 | static unsigned int cache_num = 0; |
743 | #define MAX_CACHE_SIZE 10 | 743 | #define MAX_CACHE_SIZE 10 |
@@ -789,17 +789,17 @@ int cache_add(int id, const char *name) | |||
789 | obj->id = id; | 789 | obj->id = id; |
790 | obj->popularity = 0; | 790 | obj->popularity = 0; |
791 | 791 | ||
792 | down(&cache_lock); | 792 | mutex_lock(&cache_lock); |
793 | __cache_add(obj); | 793 | __cache_add(obj); |
794 | up(&cache_lock); | 794 | mutex_unlock(&cache_lock); |
795 | return 0; | 795 | return 0; |
796 | } | 796 | } |
797 | 797 | ||
798 | void cache_delete(int id) | 798 | void cache_delete(int id) |
799 | { | 799 | { |
800 | down(&cache_lock); | 800 | mutex_lock(&cache_lock); |
801 | __cache_delete(__cache_find(id)); | 801 | __cache_delete(__cache_find(id)); |
802 | up(&cache_lock); | 802 | mutex_unlock(&cache_lock); |
803 | } | 803 | } |
804 | 804 | ||
805 | int cache_find(int id, char *name) | 805 | int cache_find(int id, char *name) |
@@ -807,13 +807,13 @@ int cache_find(int id, char *name) | |||
807 | struct object *obj; | 807 | struct object *obj; |
808 | int ret = -ENOENT; | 808 | int ret = -ENOENT; |
809 | 809 | ||
810 | down(&cache_lock); | 810 | mutex_lock(&cache_lock); |
811 | obj = __cache_find(id); | 811 | obj = __cache_find(id); |
812 | if (obj) { | 812 | if (obj) { |
813 | ret = 0; | 813 | ret = 0; |
814 | strcpy(name, obj->name); | 814 | strcpy(name, obj->name); |
815 | } | 815 | } |
816 | up(&cache_lock); | 816 | mutex_unlock(&cache_lock); |
817 | return ret; | 817 | return ret; |
818 | } | 818 | } |
819 | </programlisting> | 819 | </programlisting> |
@@ -853,7 +853,7 @@ The change is shown below, in standard patch format: the | |||
853 | int popularity; | 853 | int popularity; |
854 | }; | 854 | }; |
855 | 855 | ||
856 | -static DECLARE_MUTEX(cache_lock); | 856 | -static DEFINE_MUTEX(cache_lock); |
857 | +static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED; | 857 | +static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED; |
858 | static LIST_HEAD(cache); | 858 | static LIST_HEAD(cache); |
859 | static unsigned int cache_num = 0; | 859 | static unsigned int cache_num = 0; |
@@ -870,22 +870,22 @@ The change is shown below, in standard patch format: the | |||
870 | obj->id = id; | 870 | obj->id = id; |
871 | obj->popularity = 0; | 871 | obj->popularity = 0; |
872 | 872 | ||
873 | - down(&cache_lock); | 873 | - mutex_lock(&cache_lock); |
874 | + spin_lock_irqsave(&cache_lock, flags); | 874 | + spin_lock_irqsave(&cache_lock, flags); |
875 | __cache_add(obj); | 875 | __cache_add(obj); |
876 | - up(&cache_lock); | 876 | - mutex_unlock(&cache_lock); |
877 | + spin_unlock_irqrestore(&cache_lock, flags); | 877 | + spin_unlock_irqrestore(&cache_lock, flags); |
878 | return 0; | 878 | return 0; |
879 | } | 879 | } |
880 | 880 | ||
881 | void cache_delete(int id) | 881 | void cache_delete(int id) |
882 | { | 882 | { |
883 | - down(&cache_lock); | 883 | - mutex_lock(&cache_lock); |
884 | + unsigned long flags; | 884 | + unsigned long flags; |
885 | + | 885 | + |
886 | + spin_lock_irqsave(&cache_lock, flags); | 886 | + spin_lock_irqsave(&cache_lock, flags); |
887 | __cache_delete(__cache_find(id)); | 887 | __cache_delete(__cache_find(id)); |
888 | - up(&cache_lock); | 888 | - mutex_unlock(&cache_lock); |
889 | + spin_unlock_irqrestore(&cache_lock, flags); | 889 | + spin_unlock_irqrestore(&cache_lock, flags); |
890 | } | 890 | } |
891 | 891 | ||
@@ -895,14 +895,14 @@ The change is shown below, in standard patch format: the | |||
895 | int ret = -ENOENT; | 895 | int ret = -ENOENT; |
896 | + unsigned long flags; | 896 | + unsigned long flags; |
897 | 897 | ||
898 | - down(&cache_lock); | 898 | - mutex_lock(&cache_lock); |
899 | + spin_lock_irqsave(&cache_lock, flags); | 899 | + spin_lock_irqsave(&cache_lock, flags); |
900 | obj = __cache_find(id); | 900 | obj = __cache_find(id); |
901 | if (obj) { | 901 | if (obj) { |
902 | ret = 0; | 902 | ret = 0; |
903 | strcpy(name, obj->name); | 903 | strcpy(name, obj->name); |
904 | } | 904 | } |
905 | - up(&cache_lock); | 905 | - mutex_unlock(&cache_lock); |
906 | + spin_unlock_irqrestore(&cache_lock, flags); | 906 | + spin_unlock_irqrestore(&cache_lock, flags); |
907 | return ret; | 907 | return ret; |
908 | } | 908 | } |
diff --git a/Documentation/DocBook/lsm.tmpl b/Documentation/DocBook/lsm.tmpl index f63822195871..fe7664ce9667 100644 --- a/Documentation/DocBook/lsm.tmpl +++ b/Documentation/DocBook/lsm.tmpl | |||
@@ -33,7 +33,7 @@ | |||
33 | </authorgroup> | 33 | </authorgroup> |
34 | </articleinfo> | 34 | </articleinfo> |
35 | 35 | ||
36 | <sect1><title>Introduction</title> | 36 | <sect1 id="Introduction"><title>Introduction</title> |
37 | 37 | ||
38 | <para> | 38 | <para> |
39 | In March 2001, the National Security Agency (NSA) gave a presentation | 39 | In March 2001, the National Security Agency (NSA) gave a presentation |
diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl index 957cf5c26831..8e145857fc9d 100644 --- a/Documentation/DocBook/mtdnand.tmpl +++ b/Documentation/DocBook/mtdnand.tmpl | |||
@@ -80,7 +80,7 @@ | |||
80 | struct member has a short description which is marked with an [XXX] identifier. | 80 | struct member has a short description which is marked with an [XXX] identifier. |
81 | The following chapters explain the meaning of those identifiers. | 81 | The following chapters explain the meaning of those identifiers. |
82 | </para> | 82 | </para> |
83 | <sect1> | 83 | <sect1 id="Function_identifiers_XXX"> |
84 | <title>Function identifiers [XXX]</title> | 84 | <title>Function identifiers [XXX]</title> |
85 | <para> | 85 | <para> |
86 | The functions are marked with [XXX] identifiers in the short | 86 | The functions are marked with [XXX] identifiers in the short |
@@ -115,7 +115,7 @@ | |||
115 | </para></listitem> | 115 | </para></listitem> |
116 | </itemizedlist> | 116 | </itemizedlist> |
117 | </sect1> | 117 | </sect1> |
118 | <sect1> | 118 | <sect1 id="Struct_member_identifiers_XXX"> |
119 | <title>Struct member identifiers [XXX]</title> | 119 | <title>Struct member identifiers [XXX]</title> |
120 | <para> | 120 | <para> |
121 | The struct members are marked with [XXX] identifiers in the | 121 | The struct members are marked with [XXX] identifiers in the |
@@ -159,7 +159,7 @@ | |||
159 | basic functions and fill out some really board dependent | 159 | basic functions and fill out some really board dependent |
160 | members in the nand chip description structure. | 160 | members in the nand chip description structure. |
161 | </para> | 161 | </para> |
162 | <sect1> | 162 | <sect1 id="Basic_defines"> |
163 | <title>Basic defines</title> | 163 | <title>Basic defines</title> |
164 | <para> | 164 | <para> |
165 | At least you have to provide a mtd structure and | 165 | At least you have to provide a mtd structure and |
@@ -185,7 +185,7 @@ static struct nand_chip board_chip; | |||
185 | static unsigned long baseaddr; | 185 | static unsigned long baseaddr; |
186 | </programlisting> | 186 | </programlisting> |
187 | </sect1> | 187 | </sect1> |
188 | <sect1> | 188 | <sect1 id="Partition_defines"> |
189 | <title>Partition defines</title> | 189 | <title>Partition defines</title> |
190 | <para> | 190 | <para> |
191 | If you want to divide your device into partitions, then | 191 | If you want to divide your device into partitions, then |
@@ -204,7 +204,7 @@ static struct mtd_partition partition_info[] = { | |||
204 | }; | 204 | }; |
205 | </programlisting> | 205 | </programlisting> |
206 | </sect1> | 206 | </sect1> |
207 | <sect1> | 207 | <sect1 id="Hardware_control_functions"> |
208 | <title>Hardware control function</title> | 208 | <title>Hardware control function</title> |
209 | <para> | 209 | <para> |
210 | The hardware control function provides access to the | 210 | The hardware control function provides access to the |
@@ -246,7 +246,7 @@ static void board_hwcontrol(struct mtd_info *mtd, int cmd) | |||
246 | } | 246 | } |
247 | </programlisting> | 247 | </programlisting> |
248 | </sect1> | 248 | </sect1> |
249 | <sect1> | 249 | <sect1 id="Device_ready_function"> |
250 | <title>Device ready function</title> | 250 | <title>Device ready function</title> |
251 | <para> | 251 | <para> |
252 | If the hardware interface has the ready busy pin of the NAND chip connected to a | 252 | If the hardware interface has the ready busy pin of the NAND chip connected to a |
@@ -257,7 +257,7 @@ static void board_hwcontrol(struct mtd_info *mtd, int cmd) | |||
257 | the function must not be defined and the function pointer this->dev_ready is set to NULL. | 257 | the function must not be defined and the function pointer this->dev_ready is set to NULL. |
258 | </para> | 258 | </para> |
259 | </sect1> | 259 | </sect1> |
260 | <sect1> | 260 | <sect1 id="Init_function"> |
261 | <title>Init function</title> | 261 | <title>Init function</title> |
262 | <para> | 262 | <para> |
263 | The init function allocates memory and sets up all the board | 263 | The init function allocates memory and sets up all the board |
@@ -325,7 +325,7 @@ out: | |||
325 | module_init(board_init); | 325 | module_init(board_init); |
326 | </programlisting> | 326 | </programlisting> |
327 | </sect1> | 327 | </sect1> |
328 | <sect1> | 328 | <sect1 id="Exit_function"> |
329 | <title>Exit function</title> | 329 | <title>Exit function</title> |
330 | <para> | 330 | <para> |
331 | The exit function is only neccecary if the driver is | 331 | The exit function is only neccecary if the driver is |
@@ -359,7 +359,7 @@ module_exit(board_cleanup); | |||
359 | driver. For a list of functions which can be overridden by the board | 359 | driver. For a list of functions which can be overridden by the board |
360 | driver see the documentation of the nand_chip structure. | 360 | driver see the documentation of the nand_chip structure. |
361 | </para> | 361 | </para> |
362 | <sect1> | 362 | <sect1 id="Multiple_chip_control"> |
363 | <title>Multiple chip control</title> | 363 | <title>Multiple chip control</title> |
364 | <para> | 364 | <para> |
365 | The nand driver can control chip arrays. Therefor the | 365 | The nand driver can control chip arrays. Therefor the |
@@ -419,9 +419,9 @@ static void board_select_chip (struct mtd_info *mtd, int chip) | |||
419 | } | 419 | } |
420 | </programlisting> | 420 | </programlisting> |
421 | </sect1> | 421 | </sect1> |
422 | <sect1> | 422 | <sect1 id="Hardware_ECC_support"> |
423 | <title>Hardware ECC support</title> | 423 | <title>Hardware ECC support</title> |
424 | <sect2> | 424 | <sect2 id="Functions_and_constants"> |
425 | <title>Functions and constants</title> | 425 | <title>Functions and constants</title> |
426 | <para> | 426 | <para> |
427 | The nand driver supports three different types of | 427 | The nand driver supports three different types of |
@@ -475,7 +475,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) | |||
475 | </itemizedlist> | 475 | </itemizedlist> |
476 | </para> | 476 | </para> |
477 | </sect2> | 477 | </sect2> |
478 | <sect2> | 478 | <sect2 id="Hardware_ECC_with_syndrome_calculation"> |
479 | <title>Hardware ECC with syndrome calculation</title> | 479 | <title>Hardware ECC with syndrome calculation</title> |
480 | <para> | 480 | <para> |
481 | Many hardware ECC implementations provide Reed-Solomon | 481 | Many hardware ECC implementations provide Reed-Solomon |
@@ -500,7 +500,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) | |||
500 | </para> | 500 | </para> |
501 | </sect2> | 501 | </sect2> |
502 | </sect1> | 502 | </sect1> |
503 | <sect1> | 503 | <sect1 id="Bad_Block_table_support"> |
504 | <title>Bad block table support</title> | 504 | <title>Bad block table support</title> |
505 | <para> | 505 | <para> |
506 | Most NAND chips mark the bad blocks at a defined | 506 | Most NAND chips mark the bad blocks at a defined |
@@ -552,7 +552,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) | |||
552 | allows faster access than always checking the | 552 | allows faster access than always checking the |
553 | bad block information on the flash chip itself. | 553 | bad block information on the flash chip itself. |
554 | </para> | 554 | </para> |
555 | <sect2> | 555 | <sect2 id="Flash_based_tables"> |
556 | <title>Flash based tables</title> | 556 | <title>Flash based tables</title> |
557 | <para> | 557 | <para> |
558 | It may be desired or neccecary to keep a bad block table in FLASH. | 558 | It may be desired or neccecary to keep a bad block table in FLASH. |
@@ -587,7 +587,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) | |||
587 | </itemizedlist> | 587 | </itemizedlist> |
588 | </para> | 588 | </para> |
589 | </sect2> | 589 | </sect2> |
590 | <sect2> | 590 | <sect2 id="User_defined_tables"> |
591 | <title>User defined tables</title> | 591 | <title>User defined tables</title> |
592 | <para> | 592 | <para> |
593 | User defined tables are created by filling out a | 593 | User defined tables are created by filling out a |
@@ -676,7 +676,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) | |||
676 | </para> | 676 | </para> |
677 | </sect2> | 677 | </sect2> |
678 | </sect1> | 678 | </sect1> |
679 | <sect1> | 679 | <sect1 id="Spare_area_placement"> |
680 | <title>Spare area (auto)placement</title> | 680 | <title>Spare area (auto)placement</title> |
681 | <para> | 681 | <para> |
682 | The nand driver implements different possibilities for | 682 | The nand driver implements different possibilities for |
@@ -730,7 +730,7 @@ struct nand_oobinfo { | |||
730 | </para></listitem> | 730 | </para></listitem> |
731 | </itemizedlist> | 731 | </itemizedlist> |
732 | </para> | 732 | </para> |
733 | <sect2> | 733 | <sect2 id="Placement_defined_by_fs_driver"> |
734 | <title>Placement defined by fs driver</title> | 734 | <title>Placement defined by fs driver</title> |
735 | <para> | 735 | <para> |
736 | The calling function provides a pointer to a nand_oobinfo | 736 | The calling function provides a pointer to a nand_oobinfo |
@@ -760,7 +760,7 @@ struct nand_oobinfo { | |||
760 | done according to the given scheme in the nand_oobinfo structure. | 760 | done according to the given scheme in the nand_oobinfo structure. |
761 | </para> | 761 | </para> |
762 | </sect2> | 762 | </sect2> |
763 | <sect2> | 763 | <sect2 id="Automatic_placement"> |
764 | <title>Automatic placement</title> | 764 | <title>Automatic placement</title> |
765 | <para> | 765 | <para> |
766 | Automatic placement uses the built in defaults to place the | 766 | Automatic placement uses the built in defaults to place the |
@@ -774,7 +774,7 @@ struct nand_oobinfo { | |||
774 | done according to the default builtin scheme. | 774 | done according to the default builtin scheme. |
775 | </para> | 775 | </para> |
776 | </sect2> | 776 | </sect2> |
777 | <sect2> | 777 | <sect2 id="User_space_placement_selection"> |
778 | <title>User space placement selection</title> | 778 | <title>User space placement selection</title> |
779 | <para> | 779 | <para> |
780 | All non ecc functions like mtd->read and mtd->write use an internal | 780 | All non ecc functions like mtd->read and mtd->write use an internal |
@@ -789,9 +789,9 @@ struct nand_oobinfo { | |||
789 | </para> | 789 | </para> |
790 | </sect2> | 790 | </sect2> |
791 | </sect1> | 791 | </sect1> |
792 | <sect1> | 792 | <sect1 id="Spare_area_autoplacement_default"> |
793 | <title>Spare area autoplacement default schemes</title> | 793 | <title>Spare area autoplacement default schemes</title> |
794 | <sect2> | 794 | <sect2 id="pagesize_256"> |
795 | <title>256 byte pagesize</title> | 795 | <title>256 byte pagesize</title> |
796 | <informaltable><tgroup cols="3"><tbody> | 796 | <informaltable><tgroup cols="3"><tbody> |
797 | <row> | 797 | <row> |
@@ -843,7 +843,7 @@ pages this byte is reserved</entry> | |||
843 | </row> | 843 | </row> |
844 | </tbody></tgroup></informaltable> | 844 | </tbody></tgroup></informaltable> |
845 | </sect2> | 845 | </sect2> |
846 | <sect2> | 846 | <sect2 id="pagesize_512"> |
847 | <title>512 byte pagesize</title> | 847 | <title>512 byte pagesize</title> |
848 | <informaltable><tgroup cols="3"><tbody> | 848 | <informaltable><tgroup cols="3"><tbody> |
849 | <row> | 849 | <row> |
@@ -906,7 +906,7 @@ in this page</entry> | |||
906 | </row> | 906 | </row> |
907 | </tbody></tgroup></informaltable> | 907 | </tbody></tgroup></informaltable> |
908 | </sect2> | 908 | </sect2> |
909 | <sect2> | 909 | <sect2 id="pagesize_2048"> |
910 | <title>2048 byte pagesize</title> | 910 | <title>2048 byte pagesize</title> |
911 | <informaltable><tgroup cols="3"><tbody> | 911 | <informaltable><tgroup cols="3"><tbody> |
912 | <row> | 912 | <row> |
@@ -1126,9 +1126,9 @@ in this page</entry> | |||
1126 | <para> | 1126 | <para> |
1127 | This chapter describes the constants which might be relevant for a driver developer. | 1127 | This chapter describes the constants which might be relevant for a driver developer. |
1128 | </para> | 1128 | </para> |
1129 | <sect1> | 1129 | <sect1 id="Chip_option_constants"> |
1130 | <title>Chip option constants</title> | 1130 | <title>Chip option constants</title> |
1131 | <sect2> | 1131 | <sect2 id="Constants_for_chip_id_table"> |
1132 | <title>Constants for chip id table</title> | 1132 | <title>Constants for chip id table</title> |
1133 | <para> | 1133 | <para> |
1134 | These constants are defined in nand.h. They are ored together to describe | 1134 | These constants are defined in nand.h. They are ored together to describe |
@@ -1153,7 +1153,7 @@ in this page</entry> | |||
1153 | </programlisting> | 1153 | </programlisting> |
1154 | </para> | 1154 | </para> |
1155 | </sect2> | 1155 | </sect2> |
1156 | <sect2> | 1156 | <sect2 id="Constants_for_runtime_options"> |
1157 | <title>Constants for runtime options</title> | 1157 | <title>Constants for runtime options</title> |
1158 | <para> | 1158 | <para> |
1159 | These constants are defined in nand.h. They are ored together to describe | 1159 | These constants are defined in nand.h. They are ored together to describe |
@@ -1171,7 +1171,7 @@ in this page</entry> | |||
1171 | </sect2> | 1171 | </sect2> |
1172 | </sect1> | 1172 | </sect1> |
1173 | 1173 | ||
1174 | <sect1> | 1174 | <sect1 id="EEC_selection_constants"> |
1175 | <title>ECC selection constants</title> | 1175 | <title>ECC selection constants</title> |
1176 | <para> | 1176 | <para> |
1177 | Use these constants to select the ECC algorithm. | 1177 | Use these constants to select the ECC algorithm. |
@@ -1192,7 +1192,7 @@ in this page</entry> | |||
1192 | </para> | 1192 | </para> |
1193 | </sect1> | 1193 | </sect1> |
1194 | 1194 | ||
1195 | <sect1> | 1195 | <sect1 id="Hardware_control_related_constants"> |
1196 | <title>Hardware control related constants</title> | 1196 | <title>Hardware control related constants</title> |
1197 | <para> | 1197 | <para> |
1198 | These constants describe the requested hardware access function when | 1198 | These constants describe the requested hardware access function when |
@@ -1218,7 +1218,7 @@ in this page</entry> | |||
1218 | </para> | 1218 | </para> |
1219 | </sect1> | 1219 | </sect1> |
1220 | 1220 | ||
1221 | <sect1> | 1221 | <sect1 id="Bad_block_table_constants"> |
1222 | <title>Bad block table related constants</title> | 1222 | <title>Bad block table related constants</title> |
1223 | <para> | 1223 | <para> |
1224 | These constants describe the options used for bad block | 1224 | These constants describe the options used for bad block |
diff --git a/Documentation/DocBook/procfs-guide.tmpl b/Documentation/DocBook/procfs-guide.tmpl index 2de84dc195a8..1fd6a1ec7591 100644 --- a/Documentation/DocBook/procfs-guide.tmpl +++ b/Documentation/DocBook/procfs-guide.tmpl | |||
@@ -85,7 +85,7 @@ | |||
85 | 85 | ||
86 | 86 | ||
87 | 87 | ||
88 | <preface> | 88 | <preface id="Preface"> |
89 | <title>Preface</title> | 89 | <title>Preface</title> |
90 | 90 | ||
91 | <para> | 91 | <para> |
@@ -230,7 +230,7 @@ | |||
230 | 230 | ||
231 | 231 | ||
232 | 232 | ||
233 | <sect1> | 233 | <sect1 id="Creating_a_symlink"> |
234 | <title>Creating a symlink</title> | 234 | <title>Creating a symlink</title> |
235 | 235 | ||
236 | <funcsynopsis> | 236 | <funcsynopsis> |
@@ -254,7 +254,7 @@ | |||
254 | </para> | 254 | </para> |
255 | </sect1> | 255 | </sect1> |
256 | 256 | ||
257 | <sect1> | 257 | <sect1 id="Creating_a_directory"> |
258 | <title>Creating a directory</title> | 258 | <title>Creating a directory</title> |
259 | 259 | ||
260 | <funcsynopsis> | 260 | <funcsynopsis> |
@@ -274,7 +274,7 @@ | |||
274 | 274 | ||
275 | 275 | ||
276 | 276 | ||
277 | <sect1> | 277 | <sect1 id="Removing_an_entry"> |
278 | <title>Removing an entry</title> | 278 | <title>Removing an entry</title> |
279 | 279 | ||
280 | <funcsynopsis> | 280 | <funcsynopsis> |
@@ -340,7 +340,7 @@ entry->write_proc = write_proc_foo; | |||
340 | 340 | ||
341 | 341 | ||
342 | 342 | ||
343 | <sect1> | 343 | <sect1 id="Reading_data"> |
344 | <title>Reading data</title> | 344 | <title>Reading data</title> |
345 | 345 | ||
346 | <para> | 346 | <para> |
@@ -448,7 +448,7 @@ entry->write_proc = write_proc_foo; | |||
448 | 448 | ||
449 | 449 | ||
450 | 450 | ||
451 | <sect1> | 451 | <sect1 id="Writing_data"> |
452 | <title>Writing data</title> | 452 | <title>Writing data</title> |
453 | 453 | ||
454 | <para> | 454 | <para> |
@@ -579,7 +579,7 @@ int foo_read_func(char *page, char **start, off_t off, | |||
579 | 579 | ||
580 | 580 | ||
581 | 581 | ||
582 | <sect1> | 582 | <sect1 id="Modules"> |
583 | <title>Modules</title> | 583 | <title>Modules</title> |
584 | 584 | ||
585 | <para> | 585 | <para> |
@@ -599,7 +599,7 @@ entry->owner = THIS_MODULE; | |||
599 | 599 | ||
600 | 600 | ||
601 | 601 | ||
602 | <sect1> | 602 | <sect1 id="Mode_and_ownership"> |
603 | <title>Mode and ownership</title> | 603 | <title>Mode and ownership</title> |
604 | 604 | ||
605 | <para> | 605 | <para> |
diff --git a/Documentation/DocBook/rapidio.tmpl b/Documentation/DocBook/rapidio.tmpl index a8b88c47e809..b9e143e28c64 100644 --- a/Documentation/DocBook/rapidio.tmpl +++ b/Documentation/DocBook/rapidio.tmpl | |||
@@ -77,11 +77,11 @@ | |||
77 | <chapter id="bugs"> | 77 | <chapter id="bugs"> |
78 | <title>Known Bugs and Limitations</title> | 78 | <title>Known Bugs and Limitations</title> |
79 | 79 | ||
80 | <sect1> | 80 | <sect1 id="known_bugs"> |
81 | <title>Bugs</title> | 81 | <title>Bugs</title> |
82 | <para>None. ;)</para> | 82 | <para>None. ;)</para> |
83 | </sect1> | 83 | </sect1> |
84 | <sect1> | 84 | <sect1 id="Limitations"> |
85 | <title>Limitations</title> | 85 | <title>Limitations</title> |
86 | <para> | 86 | <para> |
87 | <orderedlist> | 87 | <orderedlist> |
@@ -100,7 +100,7 @@ | |||
100 | on devices, request/map memory region resources, | 100 | on devices, request/map memory region resources, |
101 | and manage mailboxes/doorbells. | 101 | and manage mailboxes/doorbells. |
102 | </para> | 102 | </para> |
103 | <sect1> | 103 | <sect1 id="Functions"> |
104 | <title>Functions</title> | 104 | <title>Functions</title> |
105 | !Iinclude/linux/rio_drv.h | 105 | !Iinclude/linux/rio_drv.h |
106 | !Edrivers/rapidio/rio-driver.c | 106 | !Edrivers/rapidio/rio-driver.c |
@@ -116,23 +116,23 @@ | |||
116 | subsystem. | 116 | subsystem. |
117 | </para> | 117 | </para> |
118 | 118 | ||
119 | <sect1><title>Structures</title> | 119 | <sect1 id="Structures"><title>Structures</title> |
120 | !Iinclude/linux/rio.h | 120 | !Iinclude/linux/rio.h |
121 | </sect1> | 121 | </sect1> |
122 | <sect1><title>Enumeration and Discovery</title> | 122 | <sect1 id="Enumeration_and_Discovery"><title>Enumeration and Discovery</title> |
123 | !Idrivers/rapidio/rio-scan.c | 123 | !Idrivers/rapidio/rio-scan.c |
124 | </sect1> | 124 | </sect1> |
125 | <sect1><title>Driver functionality</title> | 125 | <sect1 id="Driver_functionality"><title>Driver functionality</title> |
126 | !Idrivers/rapidio/rio.c | 126 | !Idrivers/rapidio/rio.c |
127 | !Idrivers/rapidio/rio-access.c | 127 | !Idrivers/rapidio/rio-access.c |
128 | </sect1> | 128 | </sect1> |
129 | <sect1><title>Device model support</title> | 129 | <sect1 id="Device_model_support"><title>Device model support</title> |
130 | !Idrivers/rapidio/rio-driver.c | 130 | !Idrivers/rapidio/rio-driver.c |
131 | </sect1> | 131 | </sect1> |
132 | <sect1><title>Sysfs support</title> | 132 | <sect1 id="Sysfs_support"><title>Sysfs support</title> |
133 | !Idrivers/rapidio/rio-sysfs.c | 133 | !Idrivers/rapidio/rio-sysfs.c |
134 | </sect1> | 134 | </sect1> |
135 | <sect1><title>PPC32 support</title> | 135 | <sect1 id="PPC32_support"><title>PPC32 support</title> |
136 | !Iarch/powerpc/kernel/rio.c | 136 | !Iarch/powerpc/kernel/rio.c |
137 | !Earch/powerpc/sysdev/fsl_rio.c | 137 | !Earch/powerpc/sysdev/fsl_rio.c |
138 | !Iarch/powerpc/sysdev/fsl_rio.c | 138 | !Iarch/powerpc/sysdev/fsl_rio.c |
diff --git a/Documentation/DocBook/videobook.tmpl b/Documentation/DocBook/videobook.tmpl index b3d93ee27693..89817795e668 100644 --- a/Documentation/DocBook/videobook.tmpl +++ b/Documentation/DocBook/videobook.tmpl | |||
@@ -170,7 +170,7 @@ int __init myradio_init(struct video_init *v) | |||
170 | <para> | 170 | <para> |
171 | The types available are | 171 | The types available are |
172 | </para> | 172 | </para> |
173 | <table frame="all"><title>Device Types</title> | 173 | <table frame="all" id="Device_Types"><title>Device Types</title> |
174 | <tgroup cols="3" align="left"> | 174 | <tgroup cols="3" align="left"> |
175 | <tbody> | 175 | <tbody> |
176 | <row> | 176 | <row> |
@@ -291,7 +291,7 @@ static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg) | |||
291 | allows the applications to find out what sort of a card they have found and | 291 | allows the applications to find out what sort of a card they have found and |
292 | to figure out what they want to do about it. The fields in the structure are | 292 | to figure out what they want to do about it. The fields in the structure are |
293 | </para> | 293 | </para> |
294 | <table frame="all"><title>struct video_capability fields</title> | 294 | <table frame="all" id="video_capability_fields"><title>struct video_capability fields</title> |
295 | <tgroup cols="2" align="left"> | 295 | <tgroup cols="2" align="left"> |
296 | <tbody> | 296 | <tbody> |
297 | <row> | 297 | <row> |
@@ -365,7 +365,7 @@ static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg) | |||
365 | <para> | 365 | <para> |
366 | The video_tuner structure has the following fields | 366 | The video_tuner structure has the following fields |
367 | </para> | 367 | </para> |
368 | <table frame="all"><title>struct video_tuner fields</title> | 368 | <table frame="all" id="video_tuner_fields"><title>struct video_tuner fields</title> |
369 | <tgroup cols="2" align="left"> | 369 | <tgroup cols="2" align="left"> |
370 | <tbody> | 370 | <tbody> |
371 | <row> | 371 | <row> |
@@ -398,7 +398,7 @@ static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg) | |||
398 | </tgroup> | 398 | </tgroup> |
399 | </table> | 399 | </table> |
400 | 400 | ||
401 | <table frame="all"><title>struct video_tuner flags</title> | 401 | <table frame="all" id="video_tuner_flags"><title>struct video_tuner flags</title> |
402 | <tgroup cols="2" align="left"> | 402 | <tgroup cols="2" align="left"> |
403 | <tbody> | 403 | <tbody> |
404 | <row> | 404 | <row> |
@@ -421,7 +421,7 @@ static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg) | |||
421 | </tgroup> | 421 | </tgroup> |
422 | </table> | 422 | </table> |
423 | 423 | ||
424 | <table frame="all"><title>struct video_tuner modes</title> | 424 | <table frame="all" id="video_tuner_modes"><title>struct video_tuner modes</title> |
425 | <tgroup cols="2" align="left"> | 425 | <tgroup cols="2" align="left"> |
426 | <tbody> | 426 | <tbody> |
427 | <row> | 427 | <row> |
@@ -572,7 +572,7 @@ static int current_volume=0; | |||
572 | <para> | 572 | <para> |
573 | Then we fill in the video_audio structure. This has the following format | 573 | Then we fill in the video_audio structure. This has the following format |
574 | </para> | 574 | </para> |
575 | <table frame="all"><title>struct video_audio fields</title> | 575 | <table frame="all" id="video_audio_fields"><title>struct video_audio fields</title> |
576 | <tgroup cols="2" align="left"> | 576 | <tgroup cols="2" align="left"> |
577 | <tbody> | 577 | <tbody> |
578 | <row> | 578 | <row> |
@@ -607,7 +607,7 @@ static int current_volume=0; | |||
607 | </tgroup> | 607 | </tgroup> |
608 | </table> | 608 | </table> |
609 | 609 | ||
610 | <table frame="all"><title>struct video_audio flags</title> | 610 | <table frame="all" id="video_audio_flags"><title>struct video_audio flags</title> |
611 | <tgroup cols="2" align="left"> | 611 | <tgroup cols="2" align="left"> |
612 | <tbody> | 612 | <tbody> |
613 | <row> | 613 | <row> |
@@ -625,7 +625,7 @@ static int current_volume=0; | |||
625 | </tgroup> | 625 | </tgroup> |
626 | </table> | 626 | </table> |
627 | 627 | ||
628 | <table frame="all"><title>struct video_audio modes</title> | 628 | <table frame="all" id="video_audio_modes"><title>struct video_audio modes</title> |
629 | <tgroup cols="2" align="left"> | 629 | <tgroup cols="2" align="left"> |
630 | <tbody> | 630 | <tbody> |
631 | <row> | 631 | <row> |
@@ -775,7 +775,7 @@ module_exit(cleanup); | |||
775 | </para> | 775 | </para> |
776 | </sect1> | 776 | </sect1> |
777 | </chapter> | 777 | </chapter> |
778 | <chapter> | 778 | <chapter id="Video_Capture_Devices"> |
779 | <title>Video Capture Devices</title> | 779 | <title>Video Capture Devices</title> |
780 | <sect1 id="introvid"> | 780 | <sect1 id="introvid"> |
781 | <title>Video Capture Device Types</title> | 781 | <title>Video Capture Device Types</title> |
@@ -855,7 +855,7 @@ static struct video_device my_camera | |||
855 | We use the extra video capability flags that did not apply to the | 855 | We use the extra video capability flags that did not apply to the |
856 | radio interface. The video related flags are | 856 | radio interface. The video related flags are |
857 | </para> | 857 | </para> |
858 | <table frame="all"><title>Capture Capabilities</title> | 858 | <table frame="all" id="Capture_Capabilities"><title>Capture Capabilities</title> |
859 | <tgroup cols="2" align="left"> | 859 | <tgroup cols="2" align="left"> |
860 | <tbody> | 860 | <tbody> |
861 | <row> | 861 | <row> |
@@ -1195,7 +1195,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg) | |||
1195 | inputs to the video card). Our example card has a single camera input. The | 1195 | inputs to the video card). Our example card has a single camera input. The |
1196 | fields in the structure are | 1196 | fields in the structure are |
1197 | </para> | 1197 | </para> |
1198 | <table frame="all"><title>struct video_channel fields</title> | 1198 | <table frame="all" id="video_channel_fields"><title>struct video_channel fields</title> |
1199 | <tgroup cols="2" align="left"> | 1199 | <tgroup cols="2" align="left"> |
1200 | <tbody> | 1200 | <tbody> |
1201 | <row> | 1201 | <row> |
@@ -1218,7 +1218,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg) | |||
1218 | </tbody> | 1218 | </tbody> |
1219 | </tgroup> | 1219 | </tgroup> |
1220 | </table> | 1220 | </table> |
1221 | <table frame="all"><title>struct video_channel flags</title> | 1221 | <table frame="all" id="video_channel_flags"><title>struct video_channel flags</title> |
1222 | <tgroup cols="2" align="left"> | 1222 | <tgroup cols="2" align="left"> |
1223 | <tbody> | 1223 | <tbody> |
1224 | <row> | 1224 | <row> |
@@ -1229,7 +1229,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg) | |||
1229 | </tbody> | 1229 | </tbody> |
1230 | </tgroup> | 1230 | </tgroup> |
1231 | </table> | 1231 | </table> |
1232 | <table frame="all"><title>struct video_channel types</title> | 1232 | <table frame="all" id="video_channel_types"><title>struct video_channel types</title> |
1233 | <tgroup cols="2" align="left"> | 1233 | <tgroup cols="2" align="left"> |
1234 | <tbody> | 1234 | <tbody> |
1235 | <row> | 1235 | <row> |
@@ -1242,7 +1242,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg) | |||
1242 | </tbody> | 1242 | </tbody> |
1243 | </tgroup> | 1243 | </tgroup> |
1244 | </table> | 1244 | </table> |
1245 | <table frame="all"><title>struct video_channel norms</title> | 1245 | <table frame="all" id="video_channel_norms"><title>struct video_channel norms</title> |
1246 | <tgroup cols="2" align="left"> | 1246 | <tgroup cols="2" align="left"> |
1247 | <tbody> | 1247 | <tbody> |
1248 | <row> | 1248 | <row> |
@@ -1328,7 +1328,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg) | |||
1328 | for every other pixel in the image. The other common formats the interface | 1328 | for every other pixel in the image. The other common formats the interface |
1329 | defines are | 1329 | defines are |
1330 | </para> | 1330 | </para> |
1331 | <table frame="all"><title>Framebuffer Encodings</title> | 1331 | <table frame="all" id="Framebuffer_Encodings"><title>Framebuffer Encodings</title> |
1332 | <tgroup cols="2" align="left"> | 1332 | <tgroup cols="2" align="left"> |
1333 | <tbody> | 1333 | <tbody> |
1334 | <row> | 1334 | <row> |
@@ -1466,7 +1466,7 @@ static struct video_buffer capture_fb; | |||
1466 | display. The video_window structure is used to describe the way the image | 1466 | display. The video_window structure is used to describe the way the image |
1467 | should be displayed. | 1467 | should be displayed. |
1468 | </para> | 1468 | </para> |
1469 | <table frame="all"><title>struct video_window fields</title> | 1469 | <table frame="all" id="video_window_fields"><title>struct video_window fields</title> |
1470 | <tgroup cols="2" align="left"> | 1470 | <tgroup cols="2" align="left"> |
1471 | <tbody> | 1471 | <tbody> |
1472 | <row> | 1472 | <row> |
@@ -1503,7 +1503,7 @@ static struct video_buffer capture_fb; | |||
1503 | <para> | 1503 | <para> |
1504 | Each clip is a struct video_clip which has the following fields | 1504 | Each clip is a struct video_clip which has the following fields |
1505 | </para> | 1505 | </para> |
1506 | <table frame="all"><title>video_clip fields</title> | 1506 | <table frame="all" id="video_clip_fields"><title>video_clip fields</title> |
1507 | <tgroup cols="2" align="left"> | 1507 | <tgroup cols="2" align="left"> |
1508 | <tbody> | 1508 | <tbody> |
1509 | <row> | 1509 | <row> |
diff --git a/Documentation/DocBook/z8530book.tmpl b/Documentation/DocBook/z8530book.tmpl index a507876447aa..42c75ba71ba2 100644 --- a/Documentation/DocBook/z8530book.tmpl +++ b/Documentation/DocBook/z8530book.tmpl | |||
@@ -77,7 +77,7 @@ | |||
77 | </para> | 77 | </para> |
78 | </chapter> | 78 | </chapter> |
79 | 79 | ||
80 | <chapter> | 80 | <chapter id="Driver_Modes"> |
81 | <title>Driver Modes</title> | 81 | <title>Driver Modes</title> |
82 | <para> | 82 | <para> |
83 | The Z85230 driver layer can drive Z8530, Z85C30 and Z85230 devices | 83 | The Z85230 driver layer can drive Z8530, Z85C30 and Z85230 devices |
@@ -108,7 +108,7 @@ | |||
108 | </para> | 108 | </para> |
109 | </chapter> | 109 | </chapter> |
110 | 110 | ||
111 | <chapter> | 111 | <chapter id="Using_the_Z85230_driver"> |
112 | <title>Using the Z85230 driver</title> | 112 | <title>Using the Z85230 driver</title> |
113 | <para> | 113 | <para> |
114 | The Z85230 driver provides the back end interface to your board. To | 114 | The Z85230 driver provides the back end interface to your board. To |
@@ -174,7 +174,7 @@ | |||
174 | </para> | 174 | </para> |
175 | </chapter> | 175 | </chapter> |
176 | 176 | ||
177 | <chapter> | 177 | <chapter id="Attaching_Network_Interfaces"> |
178 | <title>Attaching Network Interfaces</title> | 178 | <title>Attaching Network Interfaces</title> |
179 | <para> | 179 | <para> |
180 | If you wish to use the network interface facilities of the driver, | 180 | If you wish to use the network interface facilities of the driver, |
@@ -216,7 +216,7 @@ | |||
216 | </para> | 216 | </para> |
217 | </chapter> | 217 | </chapter> |
218 | 218 | ||
219 | <chapter> | 219 | <chapter id="Configuring_And_Activating_The_Port"> |
220 | <title>Configuring And Activating The Port</title> | 220 | <title>Configuring And Activating The Port</title> |
221 | <para> | 221 | <para> |
222 | The Z85230 driver provides helper functions and tables to load the | 222 | The Z85230 driver provides helper functions and tables to load the |
@@ -300,7 +300,7 @@ | |||
300 | </para> | 300 | </para> |
301 | </chapter> | 301 | </chapter> |
302 | 302 | ||
303 | <chapter> | 303 | <chapter id="Network_Layer_Functions"> |
304 | <title>Network Layer Functions</title> | 304 | <title>Network Layer Functions</title> |
305 | <para> | 305 | <para> |
306 | The Z8530 layer provides functions to queue packets for | 306 | The Z8530 layer provides functions to queue packets for |
@@ -327,7 +327,7 @@ | |||
327 | </para> | 327 | </para> |
328 | </chapter> | 328 | </chapter> |
329 | 329 | ||
330 | <chapter> | 330 | <chapter id="Porting_The_Z8530_Driver"> |
331 | <title>Porting The Z8530 Driver</title> | 331 | <title>Porting The Z8530 Driver</title> |
332 | <para> | 332 | <para> |
333 | The Z8530 driver is written to be portable. In DMA mode it makes | 333 | The Z8530 driver is written to be portable. In DMA mode it makes |
diff --git a/Documentation/cgroups.txt b/Documentation/cgroups.txt index 98a26f81fa75..42d7c4cb39cd 100644 --- a/Documentation/cgroups.txt +++ b/Documentation/cgroups.txt | |||
@@ -456,7 +456,7 @@ methods are create/destroy. Any others that are null are presumed to | |||
456 | be successful no-ops. | 456 | be successful no-ops. |
457 | 457 | ||
458 | struct cgroup_subsys_state *create(struct cgroup *cont) | 458 | struct cgroup_subsys_state *create(struct cgroup *cont) |
459 | LL=cgroup_mutex | 459 | (cgroup_mutex held by caller) |
460 | 460 | ||
461 | Called to create a subsystem state object for a cgroup. The | 461 | Called to create a subsystem state object for a cgroup. The |
462 | subsystem should allocate its subsystem state object for the passed | 462 | subsystem should allocate its subsystem state object for the passed |
@@ -471,14 +471,19 @@ it's the root of the hierarchy) and may be an appropriate place for | |||
471 | initialization code. | 471 | initialization code. |
472 | 472 | ||
473 | void destroy(struct cgroup *cont) | 473 | void destroy(struct cgroup *cont) |
474 | LL=cgroup_mutex | 474 | (cgroup_mutex held by caller) |
475 | 475 | ||
476 | The cgroup system is about to destroy the passed cgroup; the | 476 | The cgroup system is about to destroy the passed cgroup; the subsystem |
477 | subsystem should do any necessary cleanup | 477 | should do any necessary cleanup and free its subsystem state |
478 | object. By the time this method is called, the cgroup has already been | ||
479 | unlinked from the file system and from the child list of its parent; | ||
480 | cgroup->parent is still valid. (Note - can also be called for a | ||
481 | newly-created cgroup if an error occurs after this subsystem's | ||
482 | create() method has been called for the new cgroup). | ||
478 | 483 | ||
479 | int can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | 484 | int can_attach(struct cgroup_subsys *ss, struct cgroup *cont, |
480 | struct task_struct *task) | 485 | struct task_struct *task) |
481 | LL=cgroup_mutex | 486 | (cgroup_mutex held by caller) |
482 | 487 | ||
483 | Called prior to moving a task into a cgroup; if the subsystem | 488 | Called prior to moving a task into a cgroup; if the subsystem |
484 | returns an error, this will abort the attach operation. If a NULL | 489 | returns an error, this will abort the attach operation. If a NULL |
@@ -489,25 +494,20 @@ remain valid while the caller holds cgroup_mutex. | |||
489 | 494 | ||
490 | void attach(struct cgroup_subsys *ss, struct cgroup *cont, | 495 | void attach(struct cgroup_subsys *ss, struct cgroup *cont, |
491 | struct cgroup *old_cont, struct task_struct *task) | 496 | struct cgroup *old_cont, struct task_struct *task) |
492 | LL=cgroup_mutex | ||
493 | |||
494 | 497 | ||
495 | Called after the task has been attached to the cgroup, to allow any | 498 | Called after the task has been attached to the cgroup, to allow any |
496 | post-attachment activity that requires memory allocations or blocking. | 499 | post-attachment activity that requires memory allocations or blocking. |
497 | 500 | ||
498 | void fork(struct cgroup_subsy *ss, struct task_struct *task) | 501 | void fork(struct cgroup_subsy *ss, struct task_struct *task) |
499 | LL=callback_mutex, maybe read_lock(tasklist_lock) | ||
500 | 502 | ||
501 | Called when a task is forked into a cgroup. Also called during | 503 | Called when a task is forked into a cgroup. Also called during |
502 | registration for all existing tasks. | 504 | registration for all existing tasks. |
503 | 505 | ||
504 | void exit(struct cgroup_subsys *ss, struct task_struct *task) | 506 | void exit(struct cgroup_subsys *ss, struct task_struct *task) |
505 | LL=callback_mutex | ||
506 | 507 | ||
507 | Called during task exit | 508 | Called during task exit |
508 | 509 | ||
509 | int populate(struct cgroup_subsys *ss, struct cgroup *cont) | 510 | int populate(struct cgroup_subsys *ss, struct cgroup *cont) |
510 | LL=none | ||
511 | 511 | ||
512 | Called after creation of a cgroup to allow a subsystem to populate | 512 | Called after creation of a cgroup to allow a subsystem to populate |
513 | the cgroup directory with file entries. The subsystem should make | 513 | the cgroup directory with file entries. The subsystem should make |
@@ -524,7 +524,7 @@ example in cpusets, no task may attach before 'cpus' and 'mems' are set | |||
524 | up. | 524 | up. |
525 | 525 | ||
526 | void bind(struct cgroup_subsys *ss, struct cgroup *root) | 526 | void bind(struct cgroup_subsys *ss, struct cgroup *root) |
527 | LL=callback_mutex | 527 | (cgroup_mutex held by caller) |
528 | 528 | ||
529 | Called when a cgroup subsystem is rebound to a different hierarchy | 529 | Called when a cgroup subsystem is rebound to a different hierarchy |
530 | and root cgroup. Currently this will only involve movement between | 530 | and root cgroup. Currently this will only involve movement between |
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt new file mode 100644 index 000000000000..b5bbea92a61a --- /dev/null +++ b/Documentation/controllers/memory.txt | |||
@@ -0,0 +1,279 @@ | |||
1 | Memory Controller | ||
2 | |||
3 | Salient features | ||
4 | |||
5 | a. Enable control of both RSS (mapped) and Page Cache (unmapped) pages | ||
6 | b. The infrastructure allows easy addition of other types of memory to control | ||
7 | c. Provides *zero overhead* for non memory controller users | ||
8 | d. Provides a double LRU: global memory pressure causes reclaim from the | ||
9 | global LRU; a cgroup on hitting a limit, reclaims from the per | ||
10 | cgroup LRU | ||
11 | |||
12 | NOTE: Swap Cache (unmapped) is not accounted now. | ||
13 | |||
14 | Benefits and Purpose of the memory controller | ||
15 | |||
16 | The memory controller isolates the memory behaviour of a group of tasks | ||
17 | from the rest of the system. The article on LWN [12] mentions some probable | ||
18 | uses of the memory controller. The memory controller can be used to | ||
19 | |||
20 | a. Isolate an application or a group of applications | ||
21 | Memory hungry applications can be isolated and limited to a smaller | ||
22 | amount of memory. | ||
23 | b. Create a cgroup with limited amount of memory, this can be used | ||
24 | as a good alternative to booting with mem=XXXX. | ||
25 | c. Virtualization solutions can control the amount of memory they want | ||
26 | to assign to a virtual machine instance. | ||
27 | d. A CD/DVD burner could control the amount of memory used by the | ||
28 | rest of the system to ensure that burning does not fail due to lack | ||
29 | of available memory. | ||
30 | e. There are several other use cases, find one or use the controller just | ||
31 | for fun (to learn and hack on the VM subsystem). | ||
32 | |||
33 | 1. History | ||
34 | |||
35 | The memory controller has a long history. A request for comments for the memory | ||
36 | controller was posted by Balbir Singh [1]. At the time the RFC was posted | ||
37 | there were several implementations for memory control. The goal of the | ||
38 | RFC was to build consensus and agreement for the minimal features required | ||
39 | for memory control. The first RSS controller was posted by Balbir Singh[2] | ||
40 | in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the | ||
41 | RSS controller. At OLS, at the resource management BoF, everyone suggested | ||
42 | that we handle both page cache and RSS together. Another request was raised | ||
43 | to allow user space handling of OOM. The current memory controller is | ||
44 | at version 6; it combines both mapped (RSS) and unmapped Page | ||
45 | Cache Control [11]. | ||
46 | |||
47 | 2. Memory Control | ||
48 | |||
49 | Memory is a unique resource in the sense that it is present in a limited | ||
50 | amount. If a task requires a lot of CPU processing, the task can spread | ||
51 | its processing over a period of hours, days, months or years, but with | ||
52 | memory, the same physical memory needs to be reused to accomplish the task. | ||
53 | |||
54 | The memory controller implementation has been divided into phases. These | ||
55 | are: | ||
56 | |||
57 | 1. Memory controller | ||
58 | 2. mlock(2) controller | ||
59 | 3. Kernel user memory accounting and slab control | ||
60 | 4. user mappings length controller | ||
61 | |||
62 | The memory controller is the first controller developed. | ||
63 | |||
64 | 2.1. Design | ||
65 | |||
66 | The core of the design is a counter called the res_counter. The res_counter | ||
67 | tracks the current memory usage and limit of the group of processes associated | ||
68 | with the controller. Each cgroup has a memory controller specific data | ||
69 | structure (mem_cgroup) associated with it. | ||
70 | |||
71 | 2.2. Accounting | ||
72 | |||
73 | +--------------------+ | ||
74 | | mem_cgroup | | ||
75 | | (res_counter) | | ||
76 | +--------------------+ | ||
77 | / ^ \ | ||
78 | / | \ | ||
79 | +---------------+ | +---------------+ | ||
80 | | mm_struct | |.... | mm_struct | | ||
81 | | | | | | | ||
82 | +---------------+ | +---------------+ | ||
83 | | | ||
84 | + --------------+ | ||
85 | | | ||
86 | +---------------+ +------+--------+ | ||
87 | | page +----------> page_cgroup| | ||
88 | | | | | | ||
89 | +---------------+ +---------------+ | ||
90 | |||
91 | (Figure 1: Hierarchy of Accounting) | ||
92 | |||
93 | |||
94 | Figure 1 shows the important aspects of the controller | ||
95 | |||
96 | 1. Accounting happens per cgroup | ||
97 | 2. Each mm_struct knows about which cgroup it belongs to | ||
98 | 3. Each page has a pointer to the page_cgroup, which in turn knows the | ||
99 | cgroup it belongs to | ||
100 | |||
101 | The accounting is done as follows: mem_cgroup_charge() is invoked to setup | ||
102 | the necessary data structures and check if the cgroup that is being charged | ||
103 | is over its limit. If it is then reclaim is invoked on the cgroup. | ||
104 | More details can be found in the reclaim section of this document. | ||
105 | If everything goes well, a page meta-data-structure called page_cgroup is | ||
106 | allocated and associated with the page. This routine also adds the page to | ||
107 | the per cgroup LRU. | ||
108 | |||
109 | 2.2.1 Accounting details | ||
110 | |||
111 | All mapped pages (RSS) and unmapped user pages (Page Cache) are accounted. | ||
112 | RSS pages are accounted at the time of page_add_*_rmap() unless they've already | ||
113 | been accounted for earlier. A file page will be accounted for as Page Cache; | ||
114 | it's mapped into the page tables of a process, duplicate accounting is carefully | ||
115 | avoided. Page Cache pages are accounted at the time of add_to_page_cache(). | ||
116 | The corresponding routines that remove a page from the page tables or removes | ||
117 | a page from Page Cache is used to decrement the accounting counters of the | ||
118 | cgroup. | ||
119 | |||
120 | 2.3 Shared Page Accounting | ||
121 | |||
122 | Shared pages are accounted on the basis of the first touch approach. The | ||
123 | cgroup that first touches a page is accounted for the page. The principle | ||
124 | behind this approach is that a cgroup that aggressively uses a shared | ||
125 | page will eventually get charged for it (once it is uncharged from | ||
126 | the cgroup that brought it in -- this will happen on memory pressure). | ||
127 | |||
128 | 2.4 Reclaim | ||
129 | |||
130 | Each cgroup maintains a per cgroup LRU that consists of an active | ||
131 | and inactive list. When a cgroup goes over its limit, we first try | ||
132 | to reclaim memory from the cgroup so as to make space for the new | ||
133 | pages that the cgroup has touched. If the reclaim is unsuccessful, | ||
134 | an OOM routine is invoked to select and kill the bulkiest task in the | ||
135 | cgroup. | ||
136 | |||
137 | The reclaim algorithm has not been modified for cgroups, except that | ||
138 | pages that are selected for reclaiming come from the per cgroup LRU | ||
139 | list. | ||
140 | |||
141 | 2. Locking | ||
142 | |||
143 | The memory controller uses the following hierarchy | ||
144 | |||
145 | 1. zone->lru_lock is used for selecting pages to be isolated | ||
146 | 2. mem->per_zone->lru_lock protects the per cgroup LRU (per zone) | ||
147 | 3. lock_page_cgroup() is used to protect page->page_cgroup | ||
148 | |||
149 | 3. User Interface | ||
150 | |||
151 | 0. Configuration | ||
152 | |||
153 | a. Enable CONFIG_CGROUPS | ||
154 | b. Enable CONFIG_RESOURCE_COUNTERS | ||
155 | c. Enable CONFIG_CGROUP_MEM_CONT | ||
156 | |||
157 | 1. Prepare the cgroups | ||
158 | # mkdir -p /cgroups | ||
159 | # mount -t cgroup none /cgroups -o memory | ||
160 | |||
161 | 2. Make the new group and move bash into it | ||
162 | # mkdir /cgroups/0 | ||
163 | # echo $$ > /cgroups/0/tasks | ||
164 | |||
165 | Since now we're in the 0 cgroup, | ||
166 | We can alter the memory limit: | ||
167 | # echo -n 4M > /cgroups/0/memory.limit_in_bytes | ||
168 | |||
169 | NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, | ||
170 | mega or gigabytes. | ||
171 | |||
172 | # cat /cgroups/0/memory.limit_in_bytes | ||
173 | 4194304 Bytes | ||
174 | |||
175 | NOTE: The interface has now changed to display the usage in bytes | ||
176 | instead of pages | ||
177 | |||
178 | We can check the usage: | ||
179 | # cat /cgroups/0/memory.usage_in_bytes | ||
180 | 1216512 Bytes | ||
181 | |||
182 | A successful write to this file does not guarantee a successful set of | ||
183 | this limit to the value written into the file. This can be due to a | ||
184 | number of factors, such as rounding up to page boundaries or the total | ||
185 | availability of memory on the system. The user is required to re-read | ||
186 | this file after a write to guarantee the value committed by the kernel. | ||
187 | |||
188 | # echo -n 1 > memory.limit_in_bytes | ||
189 | # cat memory.limit_in_bytes | ||
190 | 4096 Bytes | ||
191 | |||
192 | The memory.failcnt field gives the number of times that the cgroup limit was | ||
193 | exceeded. | ||
194 | |||
195 | The memory.stat file gives accounting information. Now, the number of | ||
196 | caches, RSS and Active pages/Inactive pages are shown. | ||
197 | |||
198 | The memory.force_empty gives an interface to drop *all* charges by force. | ||
199 | |||
200 | # echo -n 1 > memory.force_empty | ||
201 | |||
202 | will drop all charges in cgroup. Currently, this is maintained for test. | ||
203 | |||
204 | 4. Testing | ||
205 | |||
206 | Balbir posted lmbench, AIM9, LTP and vmmstress results [10] and [11]. | ||
207 | Apart from that v6 has been tested with several applications and regular | ||
208 | daily use. The controller has also been tested on the PPC64, x86_64 and | ||
209 | UML platforms. | ||
210 | |||
211 | 4.1 Troubleshooting | ||
212 | |||
213 | Sometimes a user might find that the application under a cgroup is | ||
214 | terminated. There are several causes for this: | ||
215 | |||
216 | 1. The cgroup limit is too low (just too low to do anything useful) | ||
217 | 2. The user is using anonymous memory and swap is turned off or too low | ||
218 | |||
219 | A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of | ||
220 | some of the pages cached in the cgroup (page cache pages). | ||
221 | |||
222 | 4.2 Task migration | ||
223 | |||
224 | When a task migrates from one cgroup to another, it's charge is not | ||
225 | carried forward. The pages allocated from the original cgroup still | ||
226 | remain charged to it, the charge is dropped when the page is freed or | ||
227 | reclaimed. | ||
228 | |||
229 | 4.3 Removing a cgroup | ||
230 | |||
231 | A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a | ||
232 | cgroup might have some charge associated with it, even though all | ||
233 | tasks have migrated away from it. Such charges are automatically dropped at | ||
234 | rmdir() if there are no tasks. | ||
235 | |||
236 | 4.4 Choosing what to account -- Page Cache (unmapped) vs RSS (mapped)? | ||
237 | |||
238 | The type of memory accounted by the cgroup can be limited to just | ||
239 | mapped pages by writing "1" to memory.control_type field | ||
240 | |||
241 | echo -n 1 > memory.control_type | ||
242 | |||
243 | 5. TODO | ||
244 | |||
245 | 1. Add support for accounting huge pages (as a separate controller) | ||
246 | 2. Make per-cgroup scanner reclaim not-shared pages first | ||
247 | 3. Teach controller to account for shared-pages | ||
248 | 4. Start reclamation when the limit is lowered | ||
249 | 5. Start reclamation in the background when the limit is | ||
250 | not yet hit but the usage is getting closer | ||
251 | |||
252 | Summary | ||
253 | |||
254 | Overall, the memory controller has been a stable controller and has been | ||
255 | commented and discussed quite extensively in the community. | ||
256 | |||
257 | References | ||
258 | |||
259 | 1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/ | ||
260 | 2. Singh, Balbir. Memory Controller (RSS Control), | ||
261 | http://lwn.net/Articles/222762/ | ||
262 | 3. Emelianov, Pavel. Resource controllers based on process cgroups | ||
263 | http://lkml.org/lkml/2007/3/6/198 | ||
264 | 4. Emelianov, Pavel. RSS controller based on process cgroups (v2) | ||
265 | http://lkml.org/lkml/2007/4/9/74 | ||
266 | 5. Emelianov, Pavel. RSS controller based on process cgroups (v3) | ||
267 | http://lkml.org/lkml/2007/5/30/244 | ||
268 | 6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/ | ||
269 | 7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control | ||
270 | subsystem (v3), http://lwn.net/Articles/235534/ | ||
271 | 8. Singh, Balbir. RSS controller V2 test results (lmbench), | ||
272 | http://lkml.org/lkml/2007/5/17/232 | ||
273 | 9. Singh, Balbir. RSS controller V2 AIM9 results | ||
274 | http://lkml.org/lkml/2007/5/18/1 | ||
275 | 10. Singh, Balbir. Memory controller v6 results, | ||
276 | http://lkml.org/lkml/2007/8/19/36 | ||
277 | 11. Singh, Balbir. Memory controller v6, http://lkml.org/lkml/2007/8/17/69 | ||
278 | 12. Corbet, Jonathan, Controlling memory use in cgroups, | ||
279 | http://lwn.net/Articles/243795/ | ||
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index 141bef1c8599..43db6fe12814 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt | |||
@@ -523,21 +523,14 @@ from one cpuset to another, then the kernel will adjust the tasks | |||
523 | memory placement, as above, the next time that the kernel attempts | 523 | memory placement, as above, the next time that the kernel attempts |
524 | to allocate a page of memory for that task. | 524 | to allocate a page of memory for that task. |
525 | 525 | ||
526 | If a cpuset has its CPUs modified, then each task using that | 526 | If a cpuset has its 'cpus' modified, then each task in that cpuset |
527 | cpuset does _not_ change its behavior automatically. In order to | 527 | will have its allowed CPU placement changed immediately. Similarly, |
528 | minimize the impact on the critical scheduling code in the kernel, | 528 | if a tasks pid is written to a cpusets 'tasks' file, in either its |
529 | tasks will continue to use their prior CPU placement until they | 529 | current cpuset or another cpuset, then its allowed CPU placement is |
530 | are rebound to their cpuset, by rewriting their pid to the 'tasks' | 530 | changed immediately. If such a task had been bound to some subset |
531 | file of their cpuset. If a task had been bound to some subset of its | 531 | of its cpuset using the sched_setaffinity() call, the task will be |
532 | cpuset using the sched_setaffinity() call, and if any of that subset | 532 | allowed to run on any CPU allowed in its new cpuset, negating the |
533 | is still allowed in its new cpuset settings, then the task will be | 533 | affect of the prior sched_setaffinity() call. |
534 | restricted to the intersection of the CPUs it was allowed on before, | ||
535 | and its new cpuset CPU placement. If, on the other hand, there is | ||
536 | no overlap between a tasks prior placement and its new cpuset CPU | ||
537 | placement, then the task will be allowed to run on any CPU allowed | ||
538 | in its new cpuset. If a task is moved from one cpuset to another, | ||
539 | its CPU placement is updated in the same way as if the tasks pid is | ||
540 | rewritten to the 'tasks' file of its current cpuset. | ||
541 | 534 | ||
542 | In summary, the memory placement of a task whose cpuset is changed is | 535 | In summary, the memory placement of a task whose cpuset is changed is |
543 | updated by the kernel, on the next allocation of a page for that task, | 536 | updated by the kernel, on the next allocation of a page for that task, |
diff --git a/Documentation/drivers/edac/edac.txt b/Documentation/edac.txt index a5c36842ecef..a5c36842ecef 100644 --- a/Documentation/drivers/edac/edac.txt +++ b/Documentation/edac.txt | |||
diff --git a/Documentation/email-clients.txt b/Documentation/email-clients.txt index 113165b48305..2ebb94d6ed8e 100644 --- a/Documentation/email-clients.txt +++ b/Documentation/email-clients.txt | |||
@@ -170,7 +170,6 @@ Sylpheed (GUI) | |||
170 | 170 | ||
171 | - Works well for inlining text (or using attachments). | 171 | - Works well for inlining text (or using attachments). |
172 | - Allows use of an external editor. | 172 | - Allows use of an external editor. |
173 | - Not good for IMAP. | ||
174 | - Is slow on large folders. | 173 | - Is slow on large folders. |
175 | - Won't do TLS SMTP auth over a non-SSL connection. | 174 | - Won't do TLS SMTP auth over a non-SSL connection. |
176 | - Has a helpful ruler bar in the compose window. | 175 | - Has a helpful ruler bar in the compose window. |
diff --git a/Documentation/fb/deferred_io.txt b/Documentation/fb/deferred_io.txt index 63883a892120..748328370250 100644 --- a/Documentation/fb/deferred_io.txt +++ b/Documentation/fb/deferred_io.txt | |||
@@ -7,10 +7,10 @@ IO. The following example may be a useful explanation of how one such setup | |||
7 | works: | 7 | works: |
8 | 8 | ||
9 | - userspace app like Xfbdev mmaps framebuffer | 9 | - userspace app like Xfbdev mmaps framebuffer |
10 | - deferred IO and driver sets up nopage and page_mkwrite handlers | 10 | - deferred IO and driver sets up fault and page_mkwrite handlers |
11 | - userspace app tries to write to mmaped vaddress | 11 | - userspace app tries to write to mmaped vaddress |
12 | - we get pagefault and reach nopage handler | 12 | - we get pagefault and reach fault handler |
13 | - nopage handler finds and returns physical page | 13 | - fault handler finds and returns physical page |
14 | - we get page_mkwrite where we add this page to a list | 14 | - we get page_mkwrite where we add this page to a list |
15 | - schedule a workqueue task to be run after a delay | 15 | - schedule a workqueue task to be run after a delay |
16 | - app continues writing to that page with no additional cost. this is | 16 | - app continues writing to that page with no additional cost. this is |
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index a7d9d179131a..17b1659bd3f8 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -6,14 +6,6 @@ be removed from this file. | |||
6 | 6 | ||
7 | --------------------------- | 7 | --------------------------- |
8 | 8 | ||
9 | What: MXSER | ||
10 | When: December 2007 | ||
11 | Why: Old mxser driver is obsoleted by the mxser_new. Give it some time yet | ||
12 | and remove it. | ||
13 | Who: Jiri Slaby <jirislaby@gmail.com> | ||
14 | |||
15 | --------------------------- | ||
16 | |||
17 | What: dev->power.power_state | 9 | What: dev->power.power_state |
18 | When: July 2007 | 10 | When: July 2007 |
19 | Why: Broken design for runtime control over driver power states, confusing | 11 | Why: Broken design for runtime control over driver power states, confusing |
@@ -208,13 +200,6 @@ Who: Randy Dunlap <randy.dunlap@oracle.com> | |||
208 | 200 | ||
209 | --------------------------- | 201 | --------------------------- |
210 | 202 | ||
211 | What: drivers depending on OSS_OBSOLETE | ||
212 | When: options in 2.6.23, code in 2.6.25 | ||
213 | Why: obsolete OSS drivers | ||
214 | Who: Adrian Bunk <bunk@stusta.de> | ||
215 | |||
216 | --------------------------- | ||
217 | |||
218 | What: libata spindown skipping and warning | 203 | What: libata spindown skipping and warning |
219 | When: Dec 2008 | 204 | When: Dec 2008 |
220 | Why: Some halt(8) implementations synchronize caches for and spin | 205 | Why: Some halt(8) implementations synchronize caches for and spin |
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 1de155e2dc36..e68021c08fbd 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX | |||
@@ -32,6 +32,8 @@ directory-locking | |||
32 | - info about the locking scheme used for directory operations. | 32 | - info about the locking scheme used for directory operations. |
33 | dlmfs.txt | 33 | dlmfs.txt |
34 | - info on the userspace interface to the OCFS2 DLM. | 34 | - info on the userspace interface to the OCFS2 DLM. |
35 | dnotify.txt | ||
36 | - info about directory notification in Linux. | ||
35 | ecryptfs.txt | 37 | ecryptfs.txt |
36 | - docs on eCryptfs: stacked cryptographic filesystem for Linux. | 38 | - docs on eCryptfs: stacked cryptographic filesystem for Linux. |
37 | ext2.txt | 39 | ext2.txt |
@@ -80,6 +82,8 @@ relay.txt | |||
80 | - info on relay, for efficient streaming from kernel to user space. | 82 | - info on relay, for efficient streaming from kernel to user space. |
81 | romfs.txt | 83 | romfs.txt |
82 | - description of the ROMFS filesystem. | 84 | - description of the ROMFS filesystem. |
85 | sharedsubtree.txt | ||
86 | - a description of shared subtrees for namespaces. | ||
83 | smbfs.txt | 87 | smbfs.txt |
84 | - info on using filesystems with the SMB protocol (Win 3.11 and NT). | 88 | - info on using filesystems with the SMB protocol (Win 3.11 and NT). |
85 | spufs.txt | 89 | spufs.txt |
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 37c10cba7177..42d4b30b1045 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -90,7 +90,6 @@ of the locking scheme for directory operations. | |||
90 | prototypes: | 90 | prototypes: |
91 | struct inode *(*alloc_inode)(struct super_block *sb); | 91 | struct inode *(*alloc_inode)(struct super_block *sb); |
92 | void (*destroy_inode)(struct inode *); | 92 | void (*destroy_inode)(struct inode *); |
93 | void (*read_inode) (struct inode *); | ||
94 | void (*dirty_inode) (struct inode *); | 93 | void (*dirty_inode) (struct inode *); |
95 | int (*write_inode) (struct inode *, int); | 94 | int (*write_inode) (struct inode *, int); |
96 | void (*put_inode) (struct inode *); | 95 | void (*put_inode) (struct inode *); |
@@ -114,7 +113,6 @@ locking rules: | |||
114 | BKL s_lock s_umount | 113 | BKL s_lock s_umount |
115 | alloc_inode: no no no | 114 | alloc_inode: no no no |
116 | destroy_inode: no | 115 | destroy_inode: no |
117 | read_inode: no (see below) | ||
118 | dirty_inode: no (must not sleep) | 116 | dirty_inode: no (must not sleep) |
119 | write_inode: no | 117 | write_inode: no |
120 | put_inode: no | 118 | put_inode: no |
@@ -133,7 +131,6 @@ show_options: no (vfsmount->sem) | |||
133 | quota_read: no no no (see below) | 131 | quota_read: no no no (see below) |
134 | quota_write: no no no (see below) | 132 | quota_write: no no no (see below) |
135 | 133 | ||
136 | ->read_inode() is not a method - it's a callback used in iget(). | ||
137 | ->remount_fs() will have the s_umount lock if it's already mounted. | 134 | ->remount_fs() will have the s_umount lock if it's already mounted. |
138 | When called from get_sb_single, it does NOT have the s_umount lock. | 135 | When called from get_sb_single, it does NOT have the s_umount lock. |
139 | ->quota_read() and ->quota_write() functions are both guaranteed to | 136 | ->quota_read() and ->quota_write() functions are both guaranteed to |
diff --git a/Documentation/dnotify.txt b/Documentation/filesystems/dnotify.txt index 6984fca6002a..9f5d338ddbb8 100644 --- a/Documentation/dnotify.txt +++ b/Documentation/filesystems/dnotify.txt | |||
@@ -69,24 +69,24 @@ Example | |||
69 | #include <signal.h> | 69 | #include <signal.h> |
70 | #include <stdio.h> | 70 | #include <stdio.h> |
71 | #include <unistd.h> | 71 | #include <unistd.h> |
72 | 72 | ||
73 | static volatile int event_fd; | 73 | static volatile int event_fd; |
74 | 74 | ||
75 | static void handler(int sig, siginfo_t *si, void *data) | 75 | static void handler(int sig, siginfo_t *si, void *data) |
76 | { | 76 | { |
77 | event_fd = si->si_fd; | 77 | event_fd = si->si_fd; |
78 | } | 78 | } |
79 | 79 | ||
80 | int main(void) | 80 | int main(void) |
81 | { | 81 | { |
82 | struct sigaction act; | 82 | struct sigaction act; |
83 | int fd; | 83 | int fd; |
84 | 84 | ||
85 | act.sa_sigaction = handler; | 85 | act.sa_sigaction = handler; |
86 | sigemptyset(&act.sa_mask); | 86 | sigemptyset(&act.sa_mask); |
87 | act.sa_flags = SA_SIGINFO; | 87 | act.sa_flags = SA_SIGINFO; |
88 | sigaction(SIGRTMIN + 1, &act, NULL); | 88 | sigaction(SIGRTMIN + 1, &act, NULL); |
89 | 89 | ||
90 | fd = open(".", O_RDONLY); | 90 | fd = open(".", O_RDONLY); |
91 | fcntl(fd, F_SETSIG, SIGRTMIN + 1); | 91 | fcntl(fd, F_SETSIG, SIGRTMIN + 1); |
92 | fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT); | 92 | fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT); |
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 0f33c77bc14b..92b888d540a6 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting | |||
@@ -34,8 +34,8 @@ FOO_I(inode) (see in-tree filesystems for examples). | |||
34 | 34 | ||
35 | Make them ->alloc_inode and ->destroy_inode in your super_operations. | 35 | Make them ->alloc_inode and ->destroy_inode in your super_operations. |
36 | 36 | ||
37 | Keep in mind that now you need explicit initialization of private data - | 37 | Keep in mind that now you need explicit initialization of private data |
38 | typically in ->read_inode() and after getting an inode from new_inode(). | 38 | typically between calling iget_locked() and unlocking the inode. |
39 | 39 | ||
40 | At some point that will become mandatory. | 40 | At some point that will become mandatory. |
41 | 41 | ||
@@ -173,10 +173,10 @@ should be a non-blocking function that initializes those parts of a | |||
173 | newly created inode to allow the test function to succeed. 'data' is | 173 | newly created inode to allow the test function to succeed. 'data' is |
174 | passed as an opaque value to both test and set functions. | 174 | passed as an opaque value to both test and set functions. |
175 | 175 | ||
176 | When the inode has been created by iget5_locked(), it will be returned with | 176 | When the inode has been created by iget5_locked(), it will be returned with the |
177 | the I_NEW flag set and will still be locked. read_inode has not been | 177 | I_NEW flag set and will still be locked. The filesystem then needs to finalize |
178 | called so the file system still has to finalize the initialization. Once | 178 | the initialization. Once the inode is initialized it must be unlocked by |
179 | the inode is initialized it must be unlocked by calling unlock_new_inode(). | 179 | calling unlock_new_inode(). |
180 | 180 | ||
181 | The filesystem is responsible for setting (and possibly testing) i_ino | 181 | The filesystem is responsible for setting (and possibly testing) i_ino |
182 | when appropriate. There is also a simpler iget_locked function that | 182 | when appropriate. There is also a simpler iget_locked function that |
@@ -184,11 +184,19 @@ just takes the superblock and inode number as arguments and does the | |||
184 | test and set for you. | 184 | test and set for you. |
185 | 185 | ||
186 | e.g. | 186 | e.g. |
187 | inode = iget_locked(sb, ino); | 187 | inode = iget_locked(sb, ino); |
188 | if (inode->i_state & I_NEW) { | 188 | if (inode->i_state & I_NEW) { |
189 | read_inode_from_disk(inode); | 189 | err = read_inode_from_disk(inode); |
190 | unlock_new_inode(inode); | 190 | if (err < 0) { |
191 | } | 191 | iget_failed(inode); |
192 | return err; | ||
193 | } | ||
194 | unlock_new_inode(inode); | ||
195 | } | ||
196 | |||
197 | Note that if the process of setting up a new inode fails, then iget_failed() | ||
198 | should be called on the inode to render it dead, and an appropriate error | ||
199 | should be passed back to the caller. | ||
192 | 200 | ||
193 | --- | 201 | --- |
194 | [recommended] | 202 | [recommended] |
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index e2799b5fafea..5681e2fa1496 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -1029,6 +1029,14 @@ nr_inodes | |||
1029 | Denotes the number of inodes the system has allocated. This number will | 1029 | Denotes the number of inodes the system has allocated. This number will |
1030 | grow and shrink dynamically. | 1030 | grow and shrink dynamically. |
1031 | 1031 | ||
1032 | nr_open | ||
1033 | ------- | ||
1034 | |||
1035 | Denotes the maximum number of file-handles a process can | ||
1036 | allocate. Default value is 1024*1024 (1048576) which should be | ||
1037 | enough for most machines. Actual limit depends on RLIMIT_NOFILE | ||
1038 | resource limit. | ||
1039 | |||
1032 | nr_free_inodes | 1040 | nr_free_inodes |
1033 | -------------- | 1041 | -------------- |
1034 | 1042 | ||
diff --git a/Documentation/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt index 736540045dc7..736540045dc7 100644 --- a/Documentation/sharedsubtree.txt +++ b/Documentation/filesystems/sharedsubtree.txt | |||
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 9d019d35728f..bd55038b56f5 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
@@ -203,8 +203,6 @@ struct super_operations { | |||
203 | struct inode *(*alloc_inode)(struct super_block *sb); | 203 | struct inode *(*alloc_inode)(struct super_block *sb); |
204 | void (*destroy_inode)(struct inode *); | 204 | void (*destroy_inode)(struct inode *); |
205 | 205 | ||
206 | void (*read_inode) (struct inode *); | ||
207 | |||
208 | void (*dirty_inode) (struct inode *); | 206 | void (*dirty_inode) (struct inode *); |
209 | int (*write_inode) (struct inode *, int); | 207 | int (*write_inode) (struct inode *, int); |
210 | void (*put_inode) (struct inode *); | 208 | void (*put_inode) (struct inode *); |
@@ -242,15 +240,6 @@ or bottom half). | |||
242 | ->alloc_inode was defined and simply undoes anything done by | 240 | ->alloc_inode was defined and simply undoes anything done by |
243 | ->alloc_inode. | 241 | ->alloc_inode. |
244 | 242 | ||
245 | read_inode: this method is called to read a specific inode from the | ||
246 | mounted filesystem. The i_ino member in the struct inode is | ||
247 | initialized by the VFS to indicate which inode to read. Other | ||
248 | members are filled in by this method. | ||
249 | |||
250 | You can set this to NULL and use iget5_locked() instead of iget() | ||
251 | to read inodes. This is necessary for filesystems for which the | ||
252 | inode number is not sufficient to identify an inode. | ||
253 | |||
254 | dirty_inode: this method is called by the VFS to mark an inode dirty. | 243 | dirty_inode: this method is called by the VFS to mark an inode dirty. |
255 | 244 | ||
256 | write_inode: this method is called when the VFS needs to write an | 245 | write_inode: this method is called when the VFS needs to write an |
@@ -308,9 +297,9 @@ or bottom half). | |||
308 | 297 | ||
309 | quota_write: called by the VFS to write to filesystem quota file. | 298 | quota_write: called by the VFS to write to filesystem quota file. |
310 | 299 | ||
311 | The read_inode() method is responsible for filling in the "i_op" | 300 | Whoever sets up the inode is responsible for filling in the "i_op" field. This |
312 | field. This is a pointer to a "struct inode_operations" which | 301 | is a pointer to a "struct inode_operations" which describes the methods that |
313 | describes the methods that can be performed on individual inodes. | 302 | can be performed on individual inodes. |
314 | 303 | ||
315 | 304 | ||
316 | The Inode Object | 305 | The Inode Object |
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 53a63890aea4..30c101761d0d 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt | |||
@@ -96,7 +96,9 @@ or in registers (e.g., for x86_64 or for an i386 fastcall function). | |||
96 | The jprobe will work in either case, so long as the handler's | 96 | The jprobe will work in either case, so long as the handler's |
97 | prototype matches that of the probed function. | 97 | prototype matches that of the probed function. |
98 | 98 | ||
99 | 1.3 How Does a Return Probe Work? | 99 | 1.3 Return Probes |
100 | |||
101 | 1.3.1 How Does a Return Probe Work? | ||
100 | 102 | ||
101 | When you call register_kretprobe(), Kprobes establishes a kprobe at | 103 | When you call register_kretprobe(), Kprobes establishes a kprobe at |
102 | the entry to the function. When the probed function is called and this | 104 | the entry to the function. When the probed function is called and this |
@@ -107,9 +109,9 @@ At boot time, Kprobes registers a kprobe at the trampoline. | |||
107 | 109 | ||
108 | When the probed function executes its return instruction, control | 110 | When the probed function executes its return instruction, control |
109 | passes to the trampoline and that probe is hit. Kprobes' trampoline | 111 | passes to the trampoline and that probe is hit. Kprobes' trampoline |
110 | handler calls the user-specified handler associated with the kretprobe, | 112 | handler calls the user-specified return handler associated with the |
111 | then sets the saved instruction pointer to the saved return address, | 113 | kretprobe, then sets the saved instruction pointer to the saved return |
112 | and that's where execution resumes upon return from the trap. | 114 | address, and that's where execution resumes upon return from the trap. |
113 | 115 | ||
114 | While the probed function is executing, its return address is | 116 | While the probed function is executing, its return address is |
115 | stored in an object of type kretprobe_instance. Before calling | 117 | stored in an object of type kretprobe_instance. Before calling |
@@ -131,6 +133,30 @@ zero when the return probe is registered, and is incremented every | |||
131 | time the probed function is entered but there is no kretprobe_instance | 133 | time the probed function is entered but there is no kretprobe_instance |
132 | object available for establishing the return probe. | 134 | object available for establishing the return probe. |
133 | 135 | ||
136 | 1.3.2 Kretprobe entry-handler | ||
137 | |||
138 | Kretprobes also provides an optional user-specified handler which runs | ||
139 | on function entry. This handler is specified by setting the entry_handler | ||
140 | field of the kretprobe struct. Whenever the kprobe placed by kretprobe at the | ||
141 | function entry is hit, the user-defined entry_handler, if any, is invoked. | ||
142 | If the entry_handler returns 0 (success) then a corresponding return handler | ||
143 | is guaranteed to be called upon function return. If the entry_handler | ||
144 | returns a non-zero error then Kprobes leaves the return address as is, and | ||
145 | the kretprobe has no further effect for that particular function instance. | ||
146 | |||
147 | Multiple entry and return handler invocations are matched using the unique | ||
148 | kretprobe_instance object associated with them. Additionally, a user | ||
149 | may also specify per return-instance private data to be part of each | ||
150 | kretprobe_instance object. This is especially useful when sharing private | ||
151 | data between corresponding user entry and return handlers. The size of each | ||
152 | private data object can be specified at kretprobe registration time by | ||
153 | setting the data_size field of the kretprobe struct. This data can be | ||
154 | accessed through the data field of each kretprobe_instance object. | ||
155 | |||
156 | In case probed function is entered but there is no kretprobe_instance | ||
157 | object available, then in addition to incrementing the nmissed count, | ||
158 | the user entry_handler invocation is also skipped. | ||
159 | |||
134 | 2. Architectures Supported | 160 | 2. Architectures Supported |
135 | 161 | ||
136 | Kprobes, jprobes, and return probes are implemented on the following | 162 | Kprobes, jprobes, and return probes are implemented on the following |
@@ -274,6 +300,8 @@ of interest: | |||
274 | - ret_addr: the return address | 300 | - ret_addr: the return address |
275 | - rp: points to the corresponding kretprobe object | 301 | - rp: points to the corresponding kretprobe object |
276 | - task: points to the corresponding task struct | 302 | - task: points to the corresponding task struct |
303 | - data: points to per return-instance private data; see "Kretprobe | ||
304 | entry-handler" for details. | ||
277 | 305 | ||
278 | The regs_return_value(regs) macro provides a simple abstraction to | 306 | The regs_return_value(regs) macro provides a simple abstraction to |
279 | extract the return value from the appropriate register as defined by | 307 | extract the return value from the appropriate register as defined by |
@@ -556,23 +584,52 @@ report failed calls to sys_open(). | |||
556 | #include <linux/kernel.h> | 584 | #include <linux/kernel.h> |
557 | #include <linux/module.h> | 585 | #include <linux/module.h> |
558 | #include <linux/kprobes.h> | 586 | #include <linux/kprobes.h> |
587 | #include <linux/ktime.h> | ||
588 | |||
589 | /* per-instance private data */ | ||
590 | struct my_data { | ||
591 | ktime_t entry_stamp; | ||
592 | }; | ||
559 | 593 | ||
560 | static const char *probed_func = "sys_open"; | 594 | static const char *probed_func = "sys_open"; |
561 | 595 | ||
562 | /* Return-probe handler: If the probed function fails, log the return value. */ | 596 | /* Timestamp function entry. */ |
563 | static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs) | 597 | static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) |
598 | { | ||
599 | struct my_data *data; | ||
600 | |||
601 | if(!current->mm) | ||
602 | return 1; /* skip kernel threads */ | ||
603 | |||
604 | data = (struct my_data *)ri->data; | ||
605 | data->entry_stamp = ktime_get(); | ||
606 | return 0; | ||
607 | } | ||
608 | |||
609 | /* If the probed function failed, log the return value and duration. | ||
610 | * Duration may turn out to be zero consistently, depending upon the | ||
611 | * granularity of time accounting on the platform. */ | ||
612 | static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs) | ||
564 | { | 613 | { |
565 | int retval = regs_return_value(regs); | 614 | int retval = regs_return_value(regs); |
615 | struct my_data *data = (struct my_data *)ri->data; | ||
616 | s64 delta; | ||
617 | ktime_t now; | ||
618 | |||
566 | if (retval < 0) { | 619 | if (retval < 0) { |
567 | printk("%s returns %d\n", probed_func, retval); | 620 | now = ktime_get(); |
621 | delta = ktime_to_ns(ktime_sub(now, data->entry_stamp)); | ||
622 | printk("%s: return val = %d (duration = %lld ns)\n", | ||
623 | probed_func, retval, delta); | ||
568 | } | 624 | } |
569 | return 0; | 625 | return 0; |
570 | } | 626 | } |
571 | 627 | ||
572 | static struct kretprobe my_kretprobe = { | 628 | static struct kretprobe my_kretprobe = { |
573 | .handler = ret_handler, | 629 | .handler = return_handler, |
574 | /* Probe up to 20 instances concurrently. */ | 630 | .entry_handler = entry_handler, |
575 | .maxactive = 20 | 631 | .data_size = sizeof(struct my_data), |
632 | .maxactive = 20, /* probe up to 20 instances concurrently */ | ||
576 | }; | 633 | }; |
577 | 634 | ||
578 | static int __init kretprobe_init(void) | 635 | static int __init kretprobe_init(void) |
@@ -584,7 +641,7 @@ static int __init kretprobe_init(void) | |||
584 | printk("register_kretprobe failed, returned %d\n", ret); | 641 | printk("register_kretprobe failed, returned %d\n", ret); |
585 | return -1; | 642 | return -1; |
586 | } | 643 | } |
587 | printk("Planted return probe at %p\n", my_kretprobe.kp.addr); | 644 | printk("Kretprobe active on %s\n", my_kretprobe.kp.symbol_name); |
588 | return 0; | 645 | return 0; |
589 | } | 646 | } |
590 | 647 | ||
@@ -594,7 +651,7 @@ static void __exit kretprobe_exit(void) | |||
594 | printk("kretprobe unregistered\n"); | 651 | printk("kretprobe unregistered\n"); |
595 | /* nmissed > 0 suggests that maxactive was set too low. */ | 652 | /* nmissed > 0 suggests that maxactive was set too low. */ |
596 | printk("Missed probing %d instances of %s\n", | 653 | printk("Missed probing %d instances of %s\n", |
597 | my_kretprobe.nmissed, probed_func); | 654 | my_kretprobe.nmissed, probed_func); |
598 | } | 655 | } |
599 | 656 | ||
600 | module_init(kretprobe_init) | 657 | module_init(kretprobe_init) |
diff --git a/Documentation/kref.txt b/Documentation/kref.txt index f38b59d00c63..130b6e87aa7e 100644 --- a/Documentation/kref.txt +++ b/Documentation/kref.txt | |||
@@ -141,10 +141,10 @@ The last rule (rule 3) is the nastiest one to handle. Say, for | |||
141 | instance, you have a list of items that are each kref-ed, and you wish | 141 | instance, you have a list of items that are each kref-ed, and you wish |
142 | to get the first one. You can't just pull the first item off the list | 142 | to get the first one. You can't just pull the first item off the list |
143 | and kref_get() it. That violates rule 3 because you are not already | 143 | and kref_get() it. That violates rule 3 because you are not already |
144 | holding a valid pointer. You must add locks or semaphores. For | 144 | holding a valid pointer. You must add a mutex (or some other lock). |
145 | instance: | 145 | For instance: |
146 | 146 | ||
147 | static DECLARE_MUTEX(sem); | 147 | static DEFINE_MUTEX(mutex); |
148 | static LIST_HEAD(q); | 148 | static LIST_HEAD(q); |
149 | struct my_data | 149 | struct my_data |
150 | { | 150 | { |
@@ -155,12 +155,12 @@ struct my_data | |||
155 | static struct my_data *get_entry() | 155 | static struct my_data *get_entry() |
156 | { | 156 | { |
157 | struct my_data *entry = NULL; | 157 | struct my_data *entry = NULL; |
158 | down(&sem); | 158 | mutex_lock(&mutex); |
159 | if (!list_empty(&q)) { | 159 | if (!list_empty(&q)) { |
160 | entry = container_of(q.next, struct my_q_entry, link); | 160 | entry = container_of(q.next, struct my_q_entry, link); |
161 | kref_get(&entry->refcount); | 161 | kref_get(&entry->refcount); |
162 | } | 162 | } |
163 | up(&sem); | 163 | mutex_unlock(&mutex); |
164 | return entry; | 164 | return entry; |
165 | } | 165 | } |
166 | 166 | ||
@@ -174,9 +174,9 @@ static void release_entry(struct kref *ref) | |||
174 | 174 | ||
175 | static void put_entry(struct my_data *entry) | 175 | static void put_entry(struct my_data *entry) |
176 | { | 176 | { |
177 | down(&sem); | 177 | mutex_lock(&mutex); |
178 | kref_put(&entry->refcount, release_entry); | 178 | kref_put(&entry->refcount, release_entry); |
179 | up(&sem); | 179 | mutex_unlock(&mutex); |
180 | } | 180 | } |
181 | 181 | ||
182 | The kref_put() return value is useful if you do not want to hold the | 182 | The kref_put() return value is useful if you do not want to hold the |
@@ -191,13 +191,13 @@ static void release_entry(struct kref *ref) | |||
191 | 191 | ||
192 | static void put_entry(struct my_data *entry) | 192 | static void put_entry(struct my_data *entry) |
193 | { | 193 | { |
194 | down(&sem); | 194 | mutex_lock(&mutex); |
195 | if (kref_put(&entry->refcount, release_entry)) { | 195 | if (kref_put(&entry->refcount, release_entry)) { |
196 | list_del(&entry->link); | 196 | list_del(&entry->link); |
197 | up(&sem); | 197 | mutex_unlock(&mutex); |
198 | kfree(entry); | 198 | kfree(entry); |
199 | } else | 199 | } else |
200 | up(&sem); | 200 | mutex_unlock(&mutex); |
201 | } | 201 | } |
202 | 202 | ||
203 | This is really more useful if you have to call other routines as part | 203 | This is really more useful if you have to call other routines as part |
diff --git a/Documentation/md.txt b/Documentation/md.txt index 5818628207b5..396cdd982c26 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt | |||
@@ -416,6 +416,16 @@ also have | |||
416 | sectors in total that could need to be processed. The two | 416 | sectors in total that could need to be processed. The two |
417 | numbers are separated by a '/' thus effectively showing one | 417 | numbers are separated by a '/' thus effectively showing one |
418 | value, a fraction of the process that is complete. | 418 | value, a fraction of the process that is complete. |
419 | A 'select' on this attribute will return when resync completes, | ||
420 | when it reaches the current sync_max (below) and possibly at | ||
421 | other times. | ||
422 | |||
423 | sync_max | ||
424 | This is a number of sectors at which point a resync/recovery | ||
425 | process will pause. When a resync is active, the value can | ||
426 | only ever be increased, never decreased. The value of 'max' | ||
427 | effectively disables the limit. | ||
428 | |||
419 | 429 | ||
420 | sync_speed | 430 | sync_speed |
421 | This shows the current actual speed, in K/sec, of the current | 431 | This shows the current actual speed, in K/sec, of the current |
diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt index e20b19c1b60d..8deffcd68cb8 100644 --- a/Documentation/rtc.txt +++ b/Documentation/rtc.txt | |||
@@ -182,8 +182,8 @@ driver returns ENOIOCTLCMD. Some common examples: | |||
182 | since the frequency is stored in the irq_freq member of the rtc_device | 182 | since the frequency is stored in the irq_freq member of the rtc_device |
183 | structure. Your driver needs to initialize the irq_freq member during | 183 | structure. Your driver needs to initialize the irq_freq member during |
184 | init. Make sure you check the requested frequency is in range of your | 184 | init. Make sure you check the requested frequency is in range of your |
185 | hardware in the irq_set_freq function. If you cannot actually change | 185 | hardware in the irq_set_freq function. If it isn't, return -EINVAL. If |
186 | the frequency, just return -ENOTTY. | 186 | you cannot actually change the frequency, do not define irq_set_freq. |
187 | 187 | ||
188 | If all else fails, check out the rtc-test.c driver! | 188 | If all else fails, check out the rtc-test.c driver! |
189 | 189 | ||
@@ -268,8 +268,8 @@ int main(int argc, char **argv) | |||
268 | /* This read will block */ | 268 | /* This read will block */ |
269 | retval = read(fd, &data, sizeof(unsigned long)); | 269 | retval = read(fd, &data, sizeof(unsigned long)); |
270 | if (retval == -1) { | 270 | if (retval == -1) { |
271 | perror("read"); | 271 | perror("read"); |
272 | exit(errno); | 272 | exit(errno); |
273 | } | 273 | } |
274 | fprintf(stderr, " %d",i); | 274 | fprintf(stderr, " %d",i); |
275 | fflush(stderr); | 275 | fflush(stderr); |
@@ -326,11 +326,11 @@ test_READ: | |||
326 | rtc_tm.tm_sec %= 60; | 326 | rtc_tm.tm_sec %= 60; |
327 | rtc_tm.tm_min++; | 327 | rtc_tm.tm_min++; |
328 | } | 328 | } |
329 | if (rtc_tm.tm_min == 60) { | 329 | if (rtc_tm.tm_min == 60) { |
330 | rtc_tm.tm_min = 0; | 330 | rtc_tm.tm_min = 0; |
331 | rtc_tm.tm_hour++; | 331 | rtc_tm.tm_hour++; |
332 | } | 332 | } |
333 | if (rtc_tm.tm_hour == 24) | 333 | if (rtc_tm.tm_hour == 24) |
334 | rtc_tm.tm_hour = 0; | 334 | rtc_tm.tm_hour = 0; |
335 | 335 | ||
336 | retval = ioctl(fd, RTC_ALM_SET, &rtc_tm); | 336 | retval = ioctl(fd, RTC_ALM_SET, &rtc_tm); |
@@ -407,8 +407,8 @@ test_PIE: | |||
407 | "\n...Periodic IRQ rate is fixed\n"); | 407 | "\n...Periodic IRQ rate is fixed\n"); |
408 | goto done; | 408 | goto done; |
409 | } | 409 | } |
410 | perror("RTC_IRQP_SET ioctl"); | 410 | perror("RTC_IRQP_SET ioctl"); |
411 | exit(errno); | 411 | exit(errno); |
412 | } | 412 | } |
413 | 413 | ||
414 | fprintf(stderr, "\n%ldHz:\t", tmp); | 414 | fprintf(stderr, "\n%ldHz:\t", tmp); |
@@ -417,27 +417,27 @@ test_PIE: | |||
417 | /* Enable periodic interrupts */ | 417 | /* Enable periodic interrupts */ |
418 | retval = ioctl(fd, RTC_PIE_ON, 0); | 418 | retval = ioctl(fd, RTC_PIE_ON, 0); |
419 | if (retval == -1) { | 419 | if (retval == -1) { |
420 | perror("RTC_PIE_ON ioctl"); | 420 | perror("RTC_PIE_ON ioctl"); |
421 | exit(errno); | 421 | exit(errno); |
422 | } | 422 | } |
423 | 423 | ||
424 | for (i=1; i<21; i++) { | 424 | for (i=1; i<21; i++) { |
425 | /* This blocks */ | 425 | /* This blocks */ |
426 | retval = read(fd, &data, sizeof(unsigned long)); | 426 | retval = read(fd, &data, sizeof(unsigned long)); |
427 | if (retval == -1) { | 427 | if (retval == -1) { |
428 | perror("read"); | 428 | perror("read"); |
429 | exit(errno); | 429 | exit(errno); |
430 | } | 430 | } |
431 | fprintf(stderr, " %d",i); | 431 | fprintf(stderr, " %d",i); |
432 | fflush(stderr); | 432 | fflush(stderr); |
433 | irqcount++; | 433 | irqcount++; |
434 | } | 434 | } |
435 | 435 | ||
436 | /* Disable periodic interrupts */ | 436 | /* Disable periodic interrupts */ |
437 | retval = ioctl(fd, RTC_PIE_OFF, 0); | 437 | retval = ioctl(fd, RTC_PIE_OFF, 0); |
438 | if (retval == -1) { | 438 | if (retval == -1) { |
439 | perror("RTC_PIE_OFF ioctl"); | 439 | perror("RTC_PIE_OFF ioctl"); |
440 | exit(errno); | 440 | exit(errno); |
441 | } | 441 | } |
442 | } | 442 | } |
443 | 443 | ||
diff --git a/Documentation/scheduler/00-INDEX b/Documentation/scheduler/00-INDEX new file mode 100644 index 000000000000..b5f5ca069b2d --- /dev/null +++ b/Documentation/scheduler/00-INDEX | |||
@@ -0,0 +1,16 @@ | |||
1 | 00-INDEX | ||
2 | - this file. | ||
3 | sched-arch.txt | ||
4 | - CPU Scheduler implementation hints for architecture specific code. | ||
5 | sched-coding.txt | ||
6 | - reference for various scheduler-related methods in the O(1) scheduler. | ||
7 | sched-design.txt | ||
8 | - goals, design and implementation of the Linux O(1) scheduler. | ||
9 | sched-design-CFS.txt | ||
10 | - goals, design and implementation of the Complete Fair Scheduler. | ||
11 | sched-domains.txt | ||
12 | - information on scheduling domains. | ||
13 | sched-nice-design.txt | ||
14 | - How and why the scheduler's nice levels are implemented. | ||
15 | sched-stats.txt | ||
16 | - information on schedstats (Linux Scheduler Statistics). | ||
diff --git a/Documentation/sched-arch.txt b/Documentation/scheduler/sched-arch.txt index 941615a9769b..941615a9769b 100644 --- a/Documentation/sched-arch.txt +++ b/Documentation/scheduler/sched-arch.txt | |||
diff --git a/Documentation/sched-coding.txt b/Documentation/scheduler/sched-coding.txt index cbd8db752acf..cbd8db752acf 100644 --- a/Documentation/sched-coding.txt +++ b/Documentation/scheduler/sched-coding.txt | |||
diff --git a/Documentation/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt index 88bcb8767335..88bcb8767335 100644 --- a/Documentation/sched-design-CFS.txt +++ b/Documentation/scheduler/sched-design-CFS.txt | |||
diff --git a/Documentation/sched-design.txt b/Documentation/scheduler/sched-design.txt index 1605bf0cba8b..1605bf0cba8b 100644 --- a/Documentation/sched-design.txt +++ b/Documentation/scheduler/sched-design.txt | |||
diff --git a/Documentation/sched-domains.txt b/Documentation/scheduler/sched-domains.txt index a9e990ab980f..a9e990ab980f 100644 --- a/Documentation/sched-domains.txt +++ b/Documentation/scheduler/sched-domains.txt | |||
diff --git a/Documentation/sched-nice-design.txt b/Documentation/scheduler/sched-nice-design.txt index e2bae5a577e3..e2bae5a577e3 100644 --- a/Documentation/sched-nice-design.txt +++ b/Documentation/scheduler/sched-nice-design.txt | |||
diff --git a/Documentation/sched-stats.txt b/Documentation/scheduler/sched-stats.txt index 442e14d35dea..442e14d35dea 100644 --- a/Documentation/sched-stats.txt +++ b/Documentation/scheduler/sched-stats.txt | |||
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index aa986a35e994..f99254327ae5 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt | |||
@@ -23,6 +23,7 @@ Currently, these files are in /proc/sys/fs: | |||
23 | - inode-max | 23 | - inode-max |
24 | - inode-nr | 24 | - inode-nr |
25 | - inode-state | 25 | - inode-state |
26 | - nr_open | ||
26 | - overflowuid | 27 | - overflowuid |
27 | - overflowgid | 28 | - overflowgid |
28 | - suid_dumpable | 29 | - suid_dumpable |
@@ -91,6 +92,15 @@ usage of file handles and you don't need to increase the maximum. | |||
91 | 92 | ||
92 | ============================================================== | 93 | ============================================================== |
93 | 94 | ||
95 | nr_open: | ||
96 | |||
97 | This denotes the maximum number of file-handles a process can | ||
98 | allocate. Default value is 1024*1024 (1048576) which should be | ||
99 | enough for most machines. Actual limit depends on RLIMIT_NOFILE | ||
100 | resource limit. | ||
101 | |||
102 | ============================================================== | ||
103 | |||
94 | inode-max, inode-nr & inode-state: | 104 | inode-max, inode-nr & inode-state: |
95 | 105 | ||
96 | As with file handles, the kernel allocates the inode structures | 106 | As with file handles, the kernel allocates the inode structures |
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 24eac1bc735d..8a4863c4edd4 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -32,6 +32,7 @@ Currently, these files are in /proc/sys/vm: | |||
32 | - min_unmapped_ratio | 32 | - min_unmapped_ratio |
33 | - min_slab_ratio | 33 | - min_slab_ratio |
34 | - panic_on_oom | 34 | - panic_on_oom |
35 | - oom_dump_tasks | ||
35 | - oom_kill_allocating_task | 36 | - oom_kill_allocating_task |
36 | - mmap_min_address | 37 | - mmap_min_address |
37 | - numa_zonelist_order | 38 | - numa_zonelist_order |
@@ -232,6 +233,27 @@ according to your policy of failover. | |||
232 | 233 | ||
233 | ============================================================= | 234 | ============================================================= |
234 | 235 | ||
236 | oom_dump_tasks | ||
237 | |||
238 | Enables a system-wide task dump (excluding kernel threads) to be | ||
239 | produced when the kernel performs an OOM-killing and includes such | ||
240 | information as pid, uid, tgid, vm size, rss, cpu, oom_adj score, and | ||
241 | name. This is helpful to determine why the OOM killer was invoked | ||
242 | and to identify the rogue task that caused it. | ||
243 | |||
244 | If this is set to zero, this information is suppressed. On very | ||
245 | large systems with thousands of tasks it may not be feasible to dump | ||
246 | the memory state information for each one. Such systems should not | ||
247 | be forced to incur a performance penalty in OOM conditions when the | ||
248 | information may not be desired. | ||
249 | |||
250 | If this is set to non-zero, this information is shown whenever the | ||
251 | OOM killer actually kills a memory-hogging task. | ||
252 | |||
253 | The default value is 0. | ||
254 | |||
255 | ============================================================= | ||
256 | |||
235 | oom_kill_allocating_task | 257 | oom_kill_allocating_task |
236 | 258 | ||
237 | This enables or disables killing the OOM-triggering task in | 259 | This enables or disables killing the OOM-triggering task in |
diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt new file mode 100644 index 000000000000..6223eace3c09 --- /dev/null +++ b/Documentation/unaligned-memory-access.txt | |||
@@ -0,0 +1,226 @@ | |||
1 | UNALIGNED MEMORY ACCESSES | ||
2 | ========================= | ||
3 | |||
4 | Linux runs on a wide variety of architectures which have varying behaviour | ||
5 | when it comes to memory access. This document presents some details about | ||
6 | unaligned accesses, why you need to write code that doesn't cause them, | ||
7 | and how to write such code! | ||
8 | |||
9 | |||
10 | The definition of an unaligned access | ||
11 | ===================================== | ||
12 | |||
13 | Unaligned memory accesses occur when you try to read N bytes of data starting | ||
14 | from an address that is not evenly divisible by N (i.e. addr % N != 0). | ||
15 | For example, reading 4 bytes of data from address 0x10004 is fine, but | ||
16 | reading 4 bytes of data from address 0x10005 would be an unaligned memory | ||
17 | access. | ||
18 | |||
19 | The above may seem a little vague, as memory access can happen in different | ||
20 | ways. The context here is at the machine code level: certain instructions read | ||
21 | or write a number of bytes to or from memory (e.g. movb, movw, movl in x86 | ||
22 | assembly). As will become clear, it is relatively easy to spot C statements | ||
23 | which will compile to multiple-byte memory access instructions, namely when | ||
24 | dealing with types such as u16, u32 and u64. | ||
25 | |||
26 | |||
27 | Natural alignment | ||
28 | ================= | ||
29 | |||
30 | The rule mentioned above forms what we refer to as natural alignment: | ||
31 | When accessing N bytes of memory, the base memory address must be evenly | ||
32 | divisible by N, i.e. addr % N == 0. | ||
33 | |||
34 | When writing code, assume the target architecture has natural alignment | ||
35 | requirements. | ||
36 | |||
37 | In reality, only a few architectures require natural alignment on all sizes | ||
38 | of memory access. However, we must consider ALL supported architectures; | ||
39 | writing code that satisfies natural alignment requirements is the easiest way | ||
40 | to achieve full portability. | ||
41 | |||
42 | |||
43 | Why unaligned access is bad | ||
44 | =========================== | ||
45 | |||
46 | The effects of performing an unaligned memory access vary from architecture | ||
47 | to architecture. It would be easy to write a whole document on the differences | ||
48 | here; a summary of the common scenarios is presented below: | ||
49 | |||
50 | - Some architectures are able to perform unaligned memory accesses | ||
51 | transparently, but there is usually a significant performance cost. | ||
52 | - Some architectures raise processor exceptions when unaligned accesses | ||
53 | happen. The exception handler is able to correct the unaligned access, | ||
54 | at significant cost to performance. | ||
55 | - Some architectures raise processor exceptions when unaligned accesses | ||
56 | happen, but the exceptions do not contain enough information for the | ||
57 | unaligned access to be corrected. | ||
58 | - Some architectures are not capable of unaligned memory access, but will | ||
59 | silently perform a different memory access to the one that was requested, | ||
60 | resulting a a subtle code bug that is hard to detect! | ||
61 | |||
62 | It should be obvious from the above that if your code causes unaligned | ||
63 | memory accesses to happen, your code will not work correctly on certain | ||
64 | platforms and will cause performance problems on others. | ||
65 | |||
66 | |||
67 | Code that does not cause unaligned access | ||
68 | ========================================= | ||
69 | |||
70 | At first, the concepts above may seem a little hard to relate to actual | ||
71 | coding practice. After all, you don't have a great deal of control over | ||
72 | memory addresses of certain variables, etc. | ||
73 | |||
74 | Fortunately things are not too complex, as in most cases, the compiler | ||
75 | ensures that things will work for you. For example, take the following | ||
76 | structure: | ||
77 | |||
78 | struct foo { | ||
79 | u16 field1; | ||
80 | u32 field2; | ||
81 | u8 field3; | ||
82 | }; | ||
83 | |||
84 | Let us assume that an instance of the above structure resides in memory | ||
85 | starting at address 0x10000. With a basic level of understanding, it would | ||
86 | not be unreasonable to expect that accessing field2 would cause an unaligned | ||
87 | access. You'd be expecting field2 to be located at offset 2 bytes into the | ||
88 | structure, i.e. address 0x10002, but that address is not evenly divisible | ||
89 | by 4 (remember, we're reading a 4 byte value here). | ||
90 | |||
91 | Fortunately, the compiler understands the alignment constraints, so in the | ||
92 | above case it would insert 2 bytes of padding in between field1 and field2. | ||
93 | Therefore, for standard structure types you can always rely on the compiler | ||
94 | to pad structures so that accesses to fields are suitably aligned (assuming | ||
95 | you do not cast the field to a type of different length). | ||
96 | |||
97 | Similarly, you can also rely on the compiler to align variables and function | ||
98 | parameters to a naturally aligned scheme, based on the size of the type of | ||
99 | the variable. | ||
100 | |||
101 | At this point, it should be clear that accessing a single byte (u8 or char) | ||
102 | will never cause an unaligned access, because all memory addresses are evenly | ||
103 | divisible by one. | ||
104 | |||
105 | On a related topic, with the above considerations in mind you may observe | ||
106 | that you could reorder the fields in the structure in order to place fields | ||
107 | where padding would otherwise be inserted, and hence reduce the overall | ||
108 | resident memory size of structure instances. The optimal layout of the | ||
109 | above example is: | ||
110 | |||
111 | struct foo { | ||
112 | u32 field2; | ||
113 | u16 field1; | ||
114 | u8 field3; | ||
115 | }; | ||
116 | |||
117 | For a natural alignment scheme, the compiler would only have to add a single | ||
118 | byte of padding at the end of the structure. This padding is added in order | ||
119 | to satisfy alignment constraints for arrays of these structures. | ||
120 | |||
121 | Another point worth mentioning is the use of __attribute__((packed)) on a | ||
122 | structure type. This GCC-specific attribute tells the compiler never to | ||
123 | insert any padding within structures, useful when you want to use a C struct | ||
124 | to represent some data that comes in a fixed arrangement 'off the wire'. | ||
125 | |||
126 | You might be inclined to believe that usage of this attribute can easily | ||
127 | lead to unaligned accesses when accessing fields that do not satisfy | ||
128 | architectural alignment requirements. However, again, the compiler is aware | ||
129 | of the alignment constraints and will generate extra instructions to perform | ||
130 | the memory access in a way that does not cause unaligned access. Of course, | ||
131 | the extra instructions obviously cause a loss in performance compared to the | ||
132 | non-packed case, so the packed attribute should only be used when avoiding | ||
133 | structure padding is of importance. | ||
134 | |||
135 | |||
136 | Code that causes unaligned access | ||
137 | ================================= | ||
138 | |||
139 | With the above in mind, let's move onto a real life example of a function | ||
140 | that can cause an unaligned memory access. The following function adapted | ||
141 | from include/linux/etherdevice.h is an optimized routine to compare two | ||
142 | ethernet MAC addresses for equality. | ||
143 | |||
144 | unsigned int compare_ether_addr(const u8 *addr1, const u8 *addr2) | ||
145 | { | ||
146 | const u16 *a = (const u16 *) addr1; | ||
147 | const u16 *b = (const u16 *) addr2; | ||
148 | return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; | ||
149 | } | ||
150 | |||
151 | In the above function, the reference to a[0] causes 2 bytes (16 bits) to | ||
152 | be read from memory starting at address addr1. Think about what would happen | ||
153 | if addr1 was an odd address such as 0x10003. (Hint: it'd be an unaligned | ||
154 | access.) | ||
155 | |||
156 | Despite the potential unaligned access problems with the above function, it | ||
157 | is included in the kernel anyway but is understood to only work on | ||
158 | 16-bit-aligned addresses. It is up to the caller to ensure this alignment or | ||
159 | not use this function at all. This alignment-unsafe function is still useful | ||
160 | as it is a decent optimization for the cases when you can ensure alignment, | ||
161 | which is true almost all of the time in ethernet networking context. | ||
162 | |||
163 | |||
164 | Here is another example of some code that could cause unaligned accesses: | ||
165 | void myfunc(u8 *data, u32 value) | ||
166 | { | ||
167 | [...] | ||
168 | *((u32 *) data) = cpu_to_le32(value); | ||
169 | [...] | ||
170 | } | ||
171 | |||
172 | This code will cause unaligned accesses every time the data parameter points | ||
173 | to an address that is not evenly divisible by 4. | ||
174 | |||
175 | In summary, the 2 main scenarios where you may run into unaligned access | ||
176 | problems involve: | ||
177 | 1. Casting variables to types of different lengths | ||
178 | 2. Pointer arithmetic followed by access to at least 2 bytes of data | ||
179 | |||
180 | |||
181 | Avoiding unaligned accesses | ||
182 | =========================== | ||
183 | |||
184 | The easiest way to avoid unaligned access is to use the get_unaligned() and | ||
185 | put_unaligned() macros provided by the <asm/unaligned.h> header file. | ||
186 | |||
187 | Going back to an earlier example of code that potentially causes unaligned | ||
188 | access: | ||
189 | |||
190 | void myfunc(u8 *data, u32 value) | ||
191 | { | ||
192 | [...] | ||
193 | *((u32 *) data) = cpu_to_le32(value); | ||
194 | [...] | ||
195 | } | ||
196 | |||
197 | To avoid the unaligned memory access, you would rewrite it as follows: | ||
198 | |||
199 | void myfunc(u8 *data, u32 value) | ||
200 | { | ||
201 | [...] | ||
202 | value = cpu_to_le32(value); | ||
203 | put_unaligned(value, (u32 *) data); | ||
204 | [...] | ||
205 | } | ||
206 | |||
207 | The get_unaligned() macro works similarly. Assuming 'data' is a pointer to | ||
208 | memory and you wish to avoid unaligned access, its usage is as follows: | ||
209 | |||
210 | u32 value = get_unaligned((u32 *) data); | ||
211 | |||
212 | These macros work work for memory accesses of any length (not just 32 bits as | ||
213 | in the examples above). Be aware that when compared to standard access of | ||
214 | aligned memory, using these macros to access unaligned memory can be costly in | ||
215 | terms of performance. | ||
216 | |||
217 | If use of such macros is not convenient, another option is to use memcpy(), | ||
218 | where the source or destination (or both) are of type u8* or unsigned char*. | ||
219 | Due to the byte-wise nature of this operation, unaligned accesses are avoided. | ||
220 | |||
221 | -- | ||
222 | Author: Daniel Drake <dsd@gentoo.org> | ||
223 | With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt, | ||
224 | Johannes Berg, Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, | ||
225 | Uli Kunitz, Vadim Lobanov | ||
226 | |||
diff --git a/Documentation/w1/masters/00-INDEX b/Documentation/w1/masters/00-INDEX index 752613c4cea2..7b0ceaaad7af 100644 --- a/Documentation/w1/masters/00-INDEX +++ b/Documentation/w1/masters/00-INDEX | |||
@@ -4,3 +4,5 @@ ds2482 | |||
4 | - The Maxim/Dallas Semiconductor DS2482 provides 1-wire busses. | 4 | - The Maxim/Dallas Semiconductor DS2482 provides 1-wire busses. |
5 | ds2490 | 5 | ds2490 |
6 | - The Maxim/Dallas Semiconductor DS2490 builds USB <-> W1 bridges. | 6 | - The Maxim/Dallas Semiconductor DS2490 builds USB <-> W1 bridges. |
7 | w1-gpio | ||
8 | - GPIO 1-wire bus master driver. | ||
diff --git a/Documentation/w1/masters/w1-gpio b/Documentation/w1/masters/w1-gpio new file mode 100644 index 000000000000..af5d3b4aa851 --- /dev/null +++ b/Documentation/w1/masters/w1-gpio | |||
@@ -0,0 +1,33 @@ | |||
1 | Kernel driver w1-gpio | ||
2 | ===================== | ||
3 | |||
4 | Author: Ville Syrjala <syrjala@sci.fi> | ||
5 | |||
6 | |||
7 | Description | ||
8 | ----------- | ||
9 | |||
10 | GPIO 1-wire bus master driver. The driver uses the GPIO API to control the | ||
11 | wire and the GPIO pin can be specified using platform data. | ||
12 | |||
13 | |||
14 | Example (mach-at91) | ||
15 | ------------------- | ||
16 | |||
17 | #include <linux/w1-gpio.h> | ||
18 | |||
19 | static struct w1_gpio_platform_data foo_w1_gpio_pdata = { | ||
20 | .pin = AT91_PIN_PB20, | ||
21 | .is_open_drain = 1, | ||
22 | }; | ||
23 | |||
24 | static struct platform_device foo_w1_device = { | ||
25 | .name = "w1-gpio", | ||
26 | .id = -1, | ||
27 | .dev.platform_data = &foo_w1_gpio_pdata, | ||
28 | }; | ||
29 | |||
30 | ... | ||
31 | at91_set_GPIO_periph(foo_w1_gpio_pdata.pin, 1); | ||
32 | at91_set_multi_drive(foo_w1_gpio_pdata.pin, 1); | ||
33 | platform_device_register(&foo_w1_device); | ||