diff options
Diffstat (limited to 'Documentation/filesystems')
24 files changed, 286 insertions, 74 deletions
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index d362aa543b27..5139b8c9d5af 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX | |||
@@ -1,7 +1,5 @@ | |||
1 | 00-INDEX | 1 | 00-INDEX |
2 | - this file (info on some of the filesystems supported by linux). | 2 | - this file (info on some of the filesystems supported by linux). |
3 | Exporting | ||
4 | - explanation of how to make filesystems exportable. | ||
5 | Locking | 3 | Locking |
6 | - info on locking rules as they pertain to Linux VFS. | 4 | - info on locking rules as they pertain to Linux VFS. |
7 | 9p.txt | 5 | 9p.txt |
@@ -36,6 +34,8 @@ dnotify.txt | |||
36 | - info about directory notification in Linux. | 34 | - info about directory notification in Linux. |
37 | ecryptfs.txt | 35 | ecryptfs.txt |
38 | - docs on eCryptfs: stacked cryptographic filesystem for Linux. | 36 | - docs on eCryptfs: stacked cryptographic filesystem for Linux. |
37 | exofs.txt | ||
38 | - info, usage, mount options, design about EXOFS. | ||
39 | ext2.txt | 39 | ext2.txt |
40 | - info, mount options and specifications for the Ext2 filesystem. | 40 | - info, mount options and specifications for the Ext2 filesystem. |
41 | ext3.txt | 41 | ext3.txt |
@@ -68,12 +68,8 @@ mandatory-locking.txt | |||
68 | - info on the Linux implementation of Sys V mandatory file locking. | 68 | - info on the Linux implementation of Sys V mandatory file locking. |
69 | ncpfs.txt | 69 | ncpfs.txt |
70 | - info on Novell Netware(tm) filesystem using NCP protocol. | 70 | - info on Novell Netware(tm) filesystem using NCP protocol. |
71 | nfs41-server.txt | 71 | nfs/ |
72 | - info on the Linux server implementation of NFSv4 minor version 1. | 72 | - nfs-related documentation. |
73 | nfs-rdma.txt | ||
74 | - how to install and setup the Linux NFS/RDMA client and server software. | ||
75 | nfsroot.txt | ||
76 | - short guide on setting up a diskless box with NFS root filesystem. | ||
77 | nilfs2.txt | 73 | nilfs2.txt |
78 | - info and mount options for the NILFS2 filesystem. | 74 | - info and mount options for the NILFS2 filesystem. |
79 | ntfs.txt | 75 | ntfs.txt |
@@ -92,8 +88,6 @@ relay.txt | |||
92 | - info on relay, for efficient streaming from kernel to user space. | 88 | - info on relay, for efficient streaming from kernel to user space. |
93 | romfs.txt | 89 | romfs.txt |
94 | - description of the ROMFS filesystem. | 90 | - description of the ROMFS filesystem. |
95 | rpc-cache.txt | ||
96 | - introduction to the caching mechanisms in the sunrpc layer. | ||
97 | seq_file.txt | 91 | seq_file.txt |
98 | - how to use the seq_file API | 92 | - how to use the seq_file API |
99 | sharedsubtree.txt | 93 | sharedsubtree.txt |
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 18b9d0ca0630..06bbbed71206 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -460,13 +460,6 @@ in sys_read() and friends. | |||
460 | 460 | ||
461 | --------------------------- dquot_operations ------------------------------- | 461 | --------------------------- dquot_operations ------------------------------- |
462 | prototypes: | 462 | prototypes: |
463 | int (*initialize) (struct inode *, int); | ||
464 | int (*drop) (struct inode *); | ||
465 | int (*alloc_space) (struct inode *, qsize_t, int); | ||
466 | int (*alloc_inode) (const struct inode *, unsigned long); | ||
467 | int (*free_space) (struct inode *, qsize_t); | ||
468 | int (*free_inode) (const struct inode *, unsigned long); | ||
469 | int (*transfer) (struct inode *, struct iattr *); | ||
470 | int (*write_dquot) (struct dquot *); | 463 | int (*write_dquot) (struct dquot *); |
471 | int (*acquire_dquot) (struct dquot *); | 464 | int (*acquire_dquot) (struct dquot *); |
472 | int (*release_dquot) (struct dquot *); | 465 | int (*release_dquot) (struct dquot *); |
@@ -479,13 +472,6 @@ a proper locking wrt the filesystem and call the generic quota operations. | |||
479 | What filesystem should expect from the generic quota functions: | 472 | What filesystem should expect from the generic quota functions: |
480 | 473 | ||
481 | FS recursion Held locks when called | 474 | FS recursion Held locks when called |
482 | initialize: yes maybe dqonoff_sem | ||
483 | drop: yes - | ||
484 | alloc_space: ->mark_dirty() - | ||
485 | alloc_inode: ->mark_dirty() - | ||
486 | free_space: ->mark_dirty() - | ||
487 | free_inode: ->mark_dirty() - | ||
488 | transfer: yes - | ||
489 | write_dquot: yes dqonoff_sem or dqptr_sem | 475 | write_dquot: yes dqonoff_sem or dqptr_sem |
490 | acquire_dquot: yes dqonoff_sem or dqptr_sem | 476 | acquire_dquot: yes dqonoff_sem or dqptr_sem |
491 | release_dquot: yes dqonoff_sem or dqptr_sem | 477 | release_dquot: yes dqonoff_sem or dqptr_sem |
@@ -495,10 +481,6 @@ write_info: yes dqonoff_sem | |||
495 | FS recursion means calling ->quota_read() and ->quota_write() from superblock | 481 | FS recursion means calling ->quota_read() and ->quota_write() from superblock |
496 | operations. | 482 | operations. |
497 | 483 | ||
498 | ->alloc_space(), ->alloc_inode(), ->free_space(), ->free_inode() are called | ||
499 | only directly by the filesystem and do not call any fs functions only | ||
500 | the ->mark_dirty() operation. | ||
501 | |||
502 | More details about quota locking can be found in fs/dquot.c. | 484 | More details about quota locking can be found in fs/dquot.c. |
503 | 485 | ||
504 | --------------------------- vm_operations_struct ----------------------------- | 486 | --------------------------- vm_operations_struct ----------------------------- |
diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 9e94b9491d89..a91e2e2095b0 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt | |||
@@ -235,6 +235,7 @@ proc files. | |||
235 | neg=N Number of negative lookups made | 235 | neg=N Number of negative lookups made |
236 | pos=N Number of positive lookups made | 236 | pos=N Number of positive lookups made |
237 | crt=N Number of objects created by lookup | 237 | crt=N Number of objects created by lookup |
238 | tmo=N Number of lookups timed out and requeued | ||
238 | Updates n=N Number of update cookie requests seen | 239 | Updates n=N Number of update cookie requests seen |
239 | nul=N Number of upd reqs given a NULL parent | 240 | nul=N Number of upd reqs given a NULL parent |
240 | run=N Number of upd reqs granted CPU time | 241 | run=N Number of upd reqs granted CPU time |
@@ -250,8 +251,10 @@ proc files. | |||
250 | ok=N Number of successful alloc reqs | 251 | ok=N Number of successful alloc reqs |
251 | wt=N Number of alloc reqs that waited on lookup completion | 252 | wt=N Number of alloc reqs that waited on lookup completion |
252 | nbf=N Number of alloc reqs rejected -ENOBUFS | 253 | nbf=N Number of alloc reqs rejected -ENOBUFS |
254 | int=N Number of alloc reqs aborted -ERESTARTSYS | ||
253 | ops=N Number of alloc reqs submitted | 255 | ops=N Number of alloc reqs submitted |
254 | owt=N Number of alloc reqs waited for CPU time | 256 | owt=N Number of alloc reqs waited for CPU time |
257 | abt=N Number of alloc reqs aborted due to object death | ||
255 | Retrvls n=N Number of retrieval (read) requests seen | 258 | Retrvls n=N Number of retrieval (read) requests seen |
256 | ok=N Number of successful retr reqs | 259 | ok=N Number of successful retr reqs |
257 | wt=N Number of retr reqs that waited on lookup completion | 260 | wt=N Number of retr reqs that waited on lookup completion |
@@ -261,6 +264,7 @@ proc files. | |||
261 | oom=N Number of retr reqs failed -ENOMEM | 264 | oom=N Number of retr reqs failed -ENOMEM |
262 | ops=N Number of retr reqs submitted | 265 | ops=N Number of retr reqs submitted |
263 | owt=N Number of retr reqs waited for CPU time | 266 | owt=N Number of retr reqs waited for CPU time |
267 | abt=N Number of retr reqs aborted due to object death | ||
264 | Stores n=N Number of storage (write) requests seen | 268 | Stores n=N Number of storage (write) requests seen |
265 | ok=N Number of successful store reqs | 269 | ok=N Number of successful store reqs |
266 | agn=N Number of store reqs on a page already pending storage | 270 | agn=N Number of store reqs on a page already pending storage |
@@ -268,12 +272,37 @@ proc files. | |||
268 | oom=N Number of store reqs failed -ENOMEM | 272 | oom=N Number of store reqs failed -ENOMEM |
269 | ops=N Number of store reqs submitted | 273 | ops=N Number of store reqs submitted |
270 | run=N Number of store reqs granted CPU time | 274 | run=N Number of store reqs granted CPU time |
275 | pgs=N Number of pages given store req processing time | ||
276 | rxd=N Number of store reqs deleted from tracking tree | ||
277 | olm=N Number of store reqs over store limit | ||
278 | VmScan nos=N Number of release reqs against pages with no pending store | ||
279 | gon=N Number of release reqs against pages stored by time lock granted | ||
280 | bsy=N Number of release reqs ignored due to in-progress store | ||
281 | can=N Number of page stores cancelled due to release req | ||
271 | Ops pend=N Number of times async ops added to pending queues | 282 | Ops pend=N Number of times async ops added to pending queues |
272 | run=N Number of times async ops given CPU time | 283 | run=N Number of times async ops given CPU time |
273 | enq=N Number of times async ops queued for processing | 284 | enq=N Number of times async ops queued for processing |
285 | can=N Number of async ops cancelled | ||
286 | rej=N Number of async ops rejected due to object lookup/create failure | ||
274 | dfr=N Number of async ops queued for deferred release | 287 | dfr=N Number of async ops queued for deferred release |
275 | rel=N Number of async ops released | 288 | rel=N Number of async ops released |
276 | gc=N Number of deferred-release async ops garbage collected | 289 | gc=N Number of deferred-release async ops garbage collected |
290 | CacheOp alo=N Number of in-progress alloc_object() cache ops | ||
291 | luo=N Number of in-progress lookup_object() cache ops | ||
292 | luc=N Number of in-progress lookup_complete() cache ops | ||
293 | gro=N Number of in-progress grab_object() cache ops | ||
294 | upo=N Number of in-progress update_object() cache ops | ||
295 | dro=N Number of in-progress drop_object() cache ops | ||
296 | pto=N Number of in-progress put_object() cache ops | ||
297 | syn=N Number of in-progress sync_cache() cache ops | ||
298 | atc=N Number of in-progress attr_changed() cache ops | ||
299 | rap=N Number of in-progress read_or_alloc_page() cache ops | ||
300 | ras=N Number of in-progress read_or_alloc_pages() cache ops | ||
301 | alp=N Number of in-progress allocate_page() cache ops | ||
302 | als=N Number of in-progress allocate_pages() cache ops | ||
303 | wrp=N Number of in-progress write_page() cache ops | ||
304 | ucp=N Number of in-progress uncache_page() cache ops | ||
305 | dsp=N Number of in-progress dissociate_pages() cache ops | ||
277 | 306 | ||
278 | 307 | ||
279 | (*) /proc/fs/fscache/histogram | 308 | (*) /proc/fs/fscache/histogram |
@@ -299,6 +328,87 @@ proc files. | |||
299 | jiffy range covered, and the SECS field the equivalent number of seconds. | 328 | jiffy range covered, and the SECS field the equivalent number of seconds. |
300 | 329 | ||
301 | 330 | ||
331 | =========== | ||
332 | OBJECT LIST | ||
333 | =========== | ||
334 | |||
335 | If CONFIG_FSCACHE_OBJECT_LIST is enabled, the FS-Cache facility will maintain a | ||
336 | list of all the objects currently allocated and allow them to be viewed | ||
337 | through: | ||
338 | |||
339 | /proc/fs/fscache/objects | ||
340 | |||
341 | This will look something like: | ||
342 | |||
343 | [root@andromeda ~]# head /proc/fs/fscache/objects | ||
344 | OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA OBJECT_KEY, AUX_DATA | ||
345 | ======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================ | ||
346 | 17e4b 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a | ||
347 | 1693a 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a | ||
348 | |||
349 | where the first set of columns before the '|' describe the object: | ||
350 | |||
351 | COLUMN DESCRIPTION | ||
352 | ======= =============================================================== | ||
353 | OBJECT Object debugging ID (appears as OBJ%x in some debug messages) | ||
354 | PARENT Debugging ID of parent object | ||
355 | STAT Object state | ||
356 | CHLDN Number of child objects of this object | ||
357 | OPS Number of outstanding operations on this object | ||
358 | OOP Number of outstanding child object management operations | ||
359 | IPR | ||
360 | EX Number of outstanding exclusive operations | ||
361 | READS Number of outstanding read operations | ||
362 | EM Object's event mask | ||
363 | EV Events raised on this object | ||
364 | F Object flags | ||
365 | S Object slow-work work item flags | ||
366 | |||
367 | and the second set of columns describe the object's cookie, if present: | ||
368 | |||
369 | COLUMN DESCRIPTION | ||
370 | =============== ======================================================= | ||
371 | NETFS_COOKIE_DEF Name of netfs cookie definition | ||
372 | TY Cookie type (IX - index, DT - data, hex - special) | ||
373 | FL Cookie flags | ||
374 | NETFS_DATA Netfs private data stored in the cookie | ||
375 | OBJECT_KEY Object key } 1 column, with separating comma | ||
376 | AUX_DATA Object aux data } presence may be configured | ||
377 | |||
378 | The data shown may be filtered by attaching the a key to an appropriate keyring | ||
379 | before viewing the file. Something like: | ||
380 | |||
381 | keyctl add user fscache:objlist <restrictions> @s | ||
382 | |||
383 | where <restrictions> are a selection of the following letters: | ||
384 | |||
385 | K Show hexdump of object key (don't show if not given) | ||
386 | A Show hexdump of object aux data (don't show if not given) | ||
387 | |||
388 | and the following paired letters: | ||
389 | |||
390 | C Show objects that have a cookie | ||
391 | c Show objects that don't have a cookie | ||
392 | B Show objects that are busy | ||
393 | b Show objects that aren't busy | ||
394 | W Show objects that have pending writes | ||
395 | w Show objects that don't have pending writes | ||
396 | R Show objects that have outstanding reads | ||
397 | r Show objects that don't have outstanding reads | ||
398 | S Show objects that have slow work queued | ||
399 | s Show objects that don't have slow work queued | ||
400 | |||
401 | If neither side of a letter pair is given, then both are implied. For example: | ||
402 | |||
403 | keyctl add user fscache:objlist KB @s | ||
404 | |||
405 | shows objects that are busy, and lists their object keys, but does not dump | ||
406 | their auxiliary data. It also implies "CcWwRrSs", but as 'B' is given, 'b' is | ||
407 | not implied. | ||
408 | |||
409 | By default all objects and all fields will be shown. | ||
410 | |||
411 | |||
302 | ========= | 412 | ========= |
303 | DEBUGGING | 413 | DEBUGGING |
304 | ========= | 414 | ========= |
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt index 2666b1ed5e9e..1902c57b72ef 100644 --- a/Documentation/filesystems/caching/netfs-api.txt +++ b/Documentation/filesystems/caching/netfs-api.txt | |||
@@ -641,7 +641,7 @@ data file must be retired (see the relinquish cookie function below). | |||
641 | 641 | ||
642 | Furthermore, note that this does not cancel the asynchronous read or write | 642 | Furthermore, note that this does not cancel the asynchronous read or write |
643 | operation started by the read/alloc and write functions, so the page | 643 | operation started by the read/alloc and write functions, so the page |
644 | invalidation and release functions must use: | 644 | invalidation functions must use: |
645 | 645 | ||
646 | bool fscache_check_page_write(struct fscache_cookie *cookie, | 646 | bool fscache_check_page_write(struct fscache_cookie *cookie, |
647 | struct page *page); | 647 | struct page *page); |
@@ -654,6 +654,25 @@ to see if a page is being written to the cache, and: | |||
654 | to wait for it to finish if it is. | 654 | to wait for it to finish if it is. |
655 | 655 | ||
656 | 656 | ||
657 | When releasepage() is being implemented, a special FS-Cache function exists to | ||
658 | manage the heuristics of coping with vmscan trying to eject pages, which may | ||
659 | conflict with the cache trying to write pages to the cache (which may itself | ||
660 | need to allocate memory): | ||
661 | |||
662 | bool fscache_maybe_release_page(struct fscache_cookie *cookie, | ||
663 | struct page *page, | ||
664 | gfp_t gfp); | ||
665 | |||
666 | This takes the netfs cookie, and the page and gfp arguments as supplied to | ||
667 | releasepage(). It will return false if the page cannot be released yet for | ||
668 | some reason and if it returns true, the page has been uncached and can now be | ||
669 | released. | ||
670 | |||
671 | To make a page available for release, this function may wait for an outstanding | ||
672 | storage request to complete, or it may attempt to cancel the storage request - | ||
673 | in which case the page will not be stored in the cache this time. | ||
674 | |||
675 | |||
657 | ========================== | 676 | ========================== |
658 | INDEX AND DATA FILE UPDATE | 677 | INDEX AND DATA FILE UPDATE |
659 | ========================== | 678 | ========================== |
diff --git a/Documentation/filesystems/dentry-locking.txt b/Documentation/filesystems/dentry-locking.txt index 4c0c575a4012..79334ed5daa7 100644 --- a/Documentation/filesystems/dentry-locking.txt +++ b/Documentation/filesystems/dentry-locking.txt | |||
@@ -62,7 +62,8 @@ changes are : | |||
62 | 2. Insertion of a dentry into the hash table is done using | 62 | 2. Insertion of a dentry into the hash table is done using |
63 | hlist_add_head_rcu() which take care of ordering the writes - the | 63 | hlist_add_head_rcu() which take care of ordering the writes - the |
64 | writes to the dentry must be visible before the dentry is | 64 | writes to the dentry must be visible before the dentry is |
65 | inserted. This works in conjunction with hlist_for_each_rcu() while | 65 | inserted. This works in conjunction with hlist_for_each_rcu(), |
66 | which has since been replaced by hlist_for_each_entry_rcu(), while | ||
66 | walking the hash chain. The only requirement is that all | 67 | walking the hash chain. The only requirement is that all |
67 | initialization to the dentry must be done before | 68 | initialization to the dentry must be done before |
68 | hlist_add_head_rcu() since we don't have dcache_lock protection | 69 | hlist_add_head_rcu() since we don't have dcache_lock protection |
diff --git a/Documentation/filesystems/exofs.txt b/Documentation/filesystems/exofs.txt index 0ced74c2f73c..abd2a9b5b787 100644 --- a/Documentation/filesystems/exofs.txt +++ b/Documentation/filesystems/exofs.txt | |||
@@ -60,13 +60,13 @@ USAGE | |||
60 | 60 | ||
61 | mkfs.exofs --pid=65536 --format /dev/osd0 | 61 | mkfs.exofs --pid=65536 --format /dev/osd0 |
62 | 62 | ||
63 | The --format is optional if not specified no OSD_FORMAT will be | 63 | The --format is optional. If not specified, no OSD_FORMAT will be |
64 | preformed and a clean file system will be created in the specified pid, | 64 | performed and a clean file system will be created in the specified pid, |
65 | in the available space of the target. (Use --format=size_in_meg to limit | 65 | in the available space of the target. (Use --format=size_in_meg to limit |
66 | the total LUN space available) | 66 | the total LUN space available) |
67 | 67 | ||
68 | If pid already exist it will be deleted and a new one will be created in it's | 68 | If pid already exists, it will be deleted and a new one will be created in |
69 | place. Be careful. | 69 | its place. Be careful. |
70 | 70 | ||
71 | An exofs lives inside a single OSD partition. You can create multiple exofs | 71 | An exofs lives inside a single OSD partition. You can create multiple exofs |
72 | filesystems on the same device using multiple pids. | 72 | filesystems on the same device using multiple pids. |
@@ -81,7 +81,7 @@ USAGE | |||
81 | 81 | ||
82 | 7. For reference (See do-exofs example script): | 82 | 7. For reference (See do-exofs example script): |
83 | do-exofs start - an example of how to perform the above steps. | 83 | do-exofs start - an example of how to perform the above steps. |
84 | do-exofs stop - an example of how to unmount the file system. | 84 | do-exofs stop - an example of how to unmount the file system. |
85 | do-exofs format - an example of how to format and mkfs a new exofs. | 85 | do-exofs format - an example of how to format and mkfs a new exofs. |
86 | 86 | ||
87 | 8. Extra compilation flags (uncomment in fs/exofs/Kbuild): | 87 | 8. Extra compilation flags (uncomment in fs/exofs/Kbuild): |
@@ -104,8 +104,8 @@ Where: | |||
104 | exofs specific options: Options are separated by commas (,) | 104 | exofs specific options: Options are separated by commas (,) |
105 | pid=<integer> - The partition number to mount/create as | 105 | pid=<integer> - The partition number to mount/create as |
106 | container of the filesystem. | 106 | container of the filesystem. |
107 | This option is mandatory | 107 | This option is mandatory. |
108 | to=<integer> - Timeout in ticks for a single command | 108 | to=<integer> - Timeout in ticks for a single command. |
109 | default is (60 * HZ) [for debugging only] | 109 | default is (60 * HZ) [for debugging only] |
110 | 110 | ||
111 | =============================================================================== | 111 | =============================================================================== |
@@ -116,7 +116,7 @@ DESIGN | |||
116 | with a special ID (defined in common.h). | 116 | with a special ID (defined in common.h). |
117 | Information included in the file system control block is used to fill the | 117 | Information included in the file system control block is used to fill the |
118 | in-memory superblock structure at mount time. This object is created before | 118 | in-memory superblock structure at mount time. This object is created before |
119 | the file system is used by mkexofs.c It contains information such as: | 119 | the file system is used by mkexofs.c. It contains information such as: |
120 | - The file system's magic number | 120 | - The file system's magic number |
121 | - The next inode number to be allocated | 121 | - The next inode number to be allocated |
122 | 122 | ||
@@ -134,8 +134,8 @@ DESIGN | |||
134 | attributes. This applies to both regular files and other types (directories, | 134 | attributes. This applies to both regular files and other types (directories, |
135 | device files, symlinks, etc.). | 135 | device files, symlinks, etc.). |
136 | 136 | ||
137 | * Credentials are generated per object (inode and superblock) when they is | 137 | * Credentials are generated per object (inode and superblock) when they are |
138 | created in memory (read off disk or created). The credential works for all | 138 | created in memory (read from disk or created). The credential works for all |
139 | operations and is used as long as the object remains in memory. | 139 | operations and is used as long as the object remains in memory. |
140 | 140 | ||
141 | * Async OSD operations are used whenever possible, but the target may execute | 141 | * Async OSD operations are used whenever possible, but the target may execute |
@@ -145,7 +145,8 @@ DESIGN | |||
145 | from executing in reverse order: | 145 | from executing in reverse order: |
146 | - The following are handled with the OBJ_CREATED and OBJ_2BCREATED | 146 | - The following are handled with the OBJ_CREATED and OBJ_2BCREATED |
147 | flags. OBJ_CREATED is set when we know the object exists on the OSD - | 147 | flags. OBJ_CREATED is set when we know the object exists on the OSD - |
148 | in create's callback function, and when we successfully do a read_inode. | 148 | in create's callback function, and when we successfully do a |
149 | read_inode. | ||
149 | OBJ_2BCREATED is set in the beginning of the create function, so we | 150 | OBJ_2BCREATED is set in the beginning of the create function, so we |
150 | know that we should wait. | 151 | know that we should wait. |
151 | - create/delete: delete should wait until the object is created | 152 | - create/delete: delete should wait until the object is created |
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt index 05d5cf1d743f..867c5b50cb42 100644 --- a/Documentation/filesystems/ext3.txt +++ b/Documentation/filesystems/ext3.txt | |||
@@ -32,8 +32,8 @@ journal_dev=devnum When the external journal device's major/minor numbers | |||
32 | identified through its new major/minor numbers encoded | 32 | identified through its new major/minor numbers encoded |
33 | in devnum. | 33 | in devnum. |
34 | 34 | ||
35 | noload Don't load the journal on mounting. Note that this forces | 35 | norecovery Don't load the journal on mounting. Note that this forces |
36 | mount of inconsistent filesystem, which can lead to | 36 | noload mount of inconsistent filesystem, which can lead to |
37 | various problems. | 37 | various problems. |
38 | 38 | ||
39 | data=journal All data are committed into the journal prior to being | 39 | data=journal All data are committed into the journal prior to being |
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 6d94e0696f8c..e1def1786e50 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
@@ -153,8 +153,8 @@ journal_dev=devnum When the external journal device's major/minor numbers | |||
153 | identified through its new major/minor numbers encoded | 153 | identified through its new major/minor numbers encoded |
154 | in devnum. | 154 | in devnum. |
155 | 155 | ||
156 | noload Don't load the journal on mounting. Note that | 156 | norecovery Don't load the journal on mounting. Note that |
157 | if the filesystem was not unmounted cleanly, | 157 | noload if the filesystem was not unmounted cleanly, |
158 | skipping the journal replay will lead to the | 158 | skipping the journal replay will lead to the |
159 | filesystem containing inconsistencies that can | 159 | filesystem containing inconsistencies that can |
160 | lead to any number of problems. | 160 | lead to any number of problems. |
@@ -196,7 +196,7 @@ nobarrier This also requires an IO stack which can support | |||
196 | also be used to enable or disable barriers, for | 196 | also be used to enable or disable barriers, for |
197 | consistency with other ext4 mount options. | 197 | consistency with other ext4 mount options. |
198 | 198 | ||
199 | inode_readahead=n This tuning parameter controls the maximum | 199 | inode_readahead_blks=n This tuning parameter controls the maximum |
200 | number of inode table blocks that ext4's inode | 200 | number of inode table blocks that ext4's inode |
201 | table readahead algorithm will pre-read into | 201 | table readahead algorithm will pre-read into |
202 | the buffer cache. The default value is 32 blocks. | 202 | the buffer cache. The default value is 32 blocks. |
@@ -353,6 +353,12 @@ noauto_da_alloc replacing existing files via patterns such as | |||
353 | system crashes before the delayed allocation | 353 | system crashes before the delayed allocation |
354 | blocks are forced to disk. | 354 | blocks are forced to disk. |
355 | 355 | ||
356 | discard Controls whether ext4 should issue discard/TRIM | ||
357 | nodiscard(*) commands to the underlying block device when | ||
358 | blocks are freed. This is useful for SSD devices | ||
359 | and sparse/thinly-provisioned LUNs, but it is off | ||
360 | by default until sufficient testing has been done. | ||
361 | |||
356 | Data Mode | 362 | Data Mode |
357 | ========= | 363 | ========= |
358 | There are 3 different data modes: | 364 | There are 3 different data modes: |
diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX new file mode 100644 index 000000000000..2f68cd688769 --- /dev/null +++ b/Documentation/filesystems/nfs/00-INDEX | |||
@@ -0,0 +1,16 @@ | |||
1 | 00-INDEX | ||
2 | - this file (nfs-related documentation). | ||
3 | Exporting | ||
4 | - explanation of how to make filesystems exportable. | ||
5 | knfsd-stats.txt | ||
6 | - statistics which the NFS server makes available to user space. | ||
7 | nfs.txt | ||
8 | - nfs client, and DNS resolution for fs_locations. | ||
9 | nfs41-server.txt | ||
10 | - info on the Linux server implementation of NFSv4 minor version 1. | ||
11 | nfs-rdma.txt | ||
12 | - how to install and setup the Linux NFS/RDMA client and server software | ||
13 | nfsroot.txt | ||
14 | - short guide on setting up a diskless box with NFS root filesystem. | ||
15 | rpc-cache.txt | ||
16 | - introduction to the caching mechanisms in the sunrpc layer. | ||
diff --git a/Documentation/filesystems/Exporting b/Documentation/filesystems/nfs/Exporting index 87019d2b5981..87019d2b5981 100644 --- a/Documentation/filesystems/Exporting +++ b/Documentation/filesystems/nfs/Exporting | |||
diff --git a/Documentation/filesystems/knfsd-stats.txt b/Documentation/filesystems/nfs/knfsd-stats.txt index 64ced5149d37..64ced5149d37 100644 --- a/Documentation/filesystems/knfsd-stats.txt +++ b/Documentation/filesystems/nfs/knfsd-stats.txt | |||
diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs/nfs-rdma.txt index e386f7e4bcee..e386f7e4bcee 100644 --- a/Documentation/filesystems/nfs-rdma.txt +++ b/Documentation/filesystems/nfs/nfs-rdma.txt | |||
diff --git a/Documentation/filesystems/nfs.txt b/Documentation/filesystems/nfs/nfs.txt index f50f26ce6cd0..f50f26ce6cd0 100644 --- a/Documentation/filesystems/nfs.txt +++ b/Documentation/filesystems/nfs/nfs.txt | |||
diff --git a/Documentation/filesystems/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt index 5920fe26e6ff..6a53a84afc72 100644 --- a/Documentation/filesystems/nfs41-server.txt +++ b/Documentation/filesystems/nfs/nfs41-server.txt | |||
@@ -17,8 +17,7 @@ kernels must turn 4.1 on or off *before* turning support for version 4 | |||
17 | on or off; rpc.nfsd does this correctly.) | 17 | on or off; rpc.nfsd does this correctly.) |
18 | 18 | ||
19 | The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based | 19 | The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based |
20 | on the latest NFSv4.1 Internet Draft: | 20 | on RFC 5661. |
21 | http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29 | ||
22 | 21 | ||
23 | From the many new features in NFSv4.1 the current implementation | 22 | From the many new features in NFSv4.1 the current implementation |
24 | focuses on the mandatory-to-implement NFSv4.1 Sessions, providing | 23 | focuses on the mandatory-to-implement NFSv4.1 Sessions, providing |
@@ -41,10 +40,10 @@ interoperability problems with future clients. Known issues: | |||
41 | conformant with the spec (for example, we don't use kerberos | 40 | conformant with the spec (for example, we don't use kerberos |
42 | on the backchannel correctly). | 41 | on the backchannel correctly). |
43 | - no trunking support: no clients currently take advantage of | 42 | - no trunking support: no clients currently take advantage of |
44 | trunking, but this is a mandatory failure, and its use is | 43 | trunking, but this is a mandatory feature, and its use is |
45 | recommended to clients in a number of places. (E.g. to ensure | 44 | recommended to clients in a number of places. (E.g. to ensure |
46 | timely renewal in case an existing connection's retry timeouts | 45 | timely renewal in case an existing connection's retry timeouts |
47 | have gotten too long; see section 8.3 of the draft.) | 46 | have gotten too long; see section 8.3 of the RFC.) |
48 | Therefore, lack of this feature may cause future clients to | 47 | Therefore, lack of this feature may cause future clients to |
49 | fail. | 48 | fail. |
50 | - Incomplete backchannel support: incomplete backchannel gss | 49 | - Incomplete backchannel support: incomplete backchannel gss |
@@ -213,3 +212,10 @@ The following cases aren't supported yet: | |||
213 | DESTROY_CLIENTID, DESTROY_SESSION, EXCHANGE_ID. | 212 | DESTROY_CLIENTID, DESTROY_SESSION, EXCHANGE_ID. |
214 | * DESTROY_SESSION MUST be the final operation in the COMPOUND request. | 213 | * DESTROY_SESSION MUST be the final operation in the COMPOUND request. |
215 | 214 | ||
215 | Nonstandard compound limitations: | ||
216 | * No support for a sessions fore channel RPC compound that requires both a | ||
217 | ca_maxrequestsize request and a ca_maxresponsesize reply, so we may | ||
218 | fail to live up to the promise we made in CREATE_SESSION fore channel | ||
219 | negotiation. | ||
220 | * No more than one IO operation (read, write, readdir) allowed per | ||
221 | compound. | ||
diff --git a/Documentation/filesystems/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt index 3ba0b945aaf8..3ba0b945aaf8 100644 --- a/Documentation/filesystems/nfsroot.txt +++ b/Documentation/filesystems/nfs/nfsroot.txt | |||
diff --git a/Documentation/filesystems/rpc-cache.txt b/Documentation/filesystems/nfs/rpc-cache.txt index 8a382bea6808..8a382bea6808 100644 --- a/Documentation/filesystems/rpc-cache.txt +++ b/Documentation/filesystems/nfs/rpc-cache.txt | |||
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt index 01539f410676..cf6d0d85ca82 100644 --- a/Documentation/filesystems/nilfs2.txt +++ b/Documentation/filesystems/nilfs2.txt | |||
@@ -28,7 +28,7 @@ described in the man pages included in the package. | |||
28 | Project web page: http://www.nilfs.org/en/ | 28 | Project web page: http://www.nilfs.org/en/ |
29 | Download page: http://www.nilfs.org/en/download.html | 29 | Download page: http://www.nilfs.org/en/download.html |
30 | Git tree web page: http://www.nilfs.org/git/ | 30 | Git tree web page: http://www.nilfs.org/git/ |
31 | NILFS mailing lists: http://www.nilfs.org/mailman/listinfo/users | 31 | List info: http://vger.kernel.org/vger-lists.html#linux-nilfs |
32 | 32 | ||
33 | Caveats | 33 | Caveats |
34 | ======= | 34 | ======= |
@@ -49,8 +49,7 @@ Mount options | |||
49 | NILFS2 supports the following mount options: | 49 | NILFS2 supports the following mount options: |
50 | (*) == default | 50 | (*) == default |
51 | 51 | ||
52 | barrier=on(*) This enables/disables barriers. barrier=off disables | 52 | nobarrier Disables barriers. |
53 | it, barrier=on enables it. | ||
54 | errors=continue(*) Keep going on a filesystem error. | 53 | errors=continue(*) Keep going on a filesystem error. |
55 | errors=remount-ro Remount the filesystem read-only on an error. | 54 | errors=remount-ro Remount the filesystem read-only on an error. |
56 | errors=panic Panic and halt the machine if an error occurs. | 55 | errors=panic Panic and halt the machine if an error occurs. |
@@ -71,6 +70,13 @@ order=strict Apply strict in-order semantics that preserves sequence | |||
71 | blocks. That means, it is guaranteed that no | 70 | blocks. That means, it is guaranteed that no |
72 | overtaking of events occurs in the recovered file | 71 | overtaking of events occurs in the recovered file |
73 | system after a crash. | 72 | system after a crash. |
73 | norecovery Disable recovery of the filesystem on mount. | ||
74 | This disables every write access on the device for | ||
75 | read-only mounts or snapshots. This option will fail | ||
76 | for r/w mounts on an unclean volume. | ||
77 | discard Issue discard/TRIM commands to the underlying block | ||
78 | device when blocks are freed. This is useful for SSD | ||
79 | devices and sparse/thinly-provisioned LUNs. | ||
74 | 80 | ||
75 | NILFS2 usage | 81 | NILFS2 usage |
76 | ============ | 82 | ============ |
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index c2a0871280a0..c58b9f5ba002 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt | |||
@@ -20,15 +20,16 @@ Lots of code taken from ext3 and other projects. | |||
20 | Authors in alphabetical order: | 20 | Authors in alphabetical order: |
21 | Joel Becker <joel.becker@oracle.com> | 21 | Joel Becker <joel.becker@oracle.com> |
22 | Zach Brown <zach.brown@oracle.com> | 22 | Zach Brown <zach.brown@oracle.com> |
23 | Mark Fasheh <mark.fasheh@oracle.com> | 23 | Mark Fasheh <mfasheh@suse.com> |
24 | Kurt Hackel <kurt.hackel@oracle.com> | 24 | Kurt Hackel <kurt.hackel@oracle.com> |
25 | Tao Ma <tao.ma@oracle.com> | ||
25 | Sunil Mushran <sunil.mushran@oracle.com> | 26 | Sunil Mushran <sunil.mushran@oracle.com> |
26 | Manish Singh <manish.singh@oracle.com> | 27 | Manish Singh <manish.singh@oracle.com> |
28 | Tiger Yang <tiger.yang@oracle.com> | ||
27 | 29 | ||
28 | Caveats | 30 | Caveats |
29 | ======= | 31 | ======= |
30 | Features which OCFS2 does not support yet: | 32 | Features which OCFS2 does not support yet: |
31 | - quotas | ||
32 | - Directory change notification (F_NOTIFY) | 33 | - Directory change notification (F_NOTIFY) |
33 | - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) | 34 | - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) |
34 | 35 | ||
@@ -70,7 +71,6 @@ commit=nrsec (*) Ocfs2 can be told to sync all its data and metadata | |||
70 | performance. | 71 | performance. |
71 | localalloc=8(*) Allows custom localalloc size in MB. If the value is too | 72 | localalloc=8(*) Allows custom localalloc size in MB. If the value is too |
72 | large, the fs will silently revert it to the default. | 73 | large, the fs will silently revert it to the default. |
73 | Localalloc is not enabled for local mounts. | ||
74 | localflocks This disables cluster aware flock. | 74 | localflocks This disables cluster aware flock. |
75 | inode64 Indicates that Ocfs2 is allowed to create inodes at | 75 | inode64 Indicates that Ocfs2 is allowed to create inodes at |
76 | any location in the filesystem, including those which | 76 | any location in the filesystem, including those which |
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 92b888d540a6..a7e9746ee7ea 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting | |||
@@ -140,7 +140,7 @@ Callers of notify_change() need ->i_mutex now. | |||
140 | New super_block field "struct export_operations *s_export_op" for | 140 | New super_block field "struct export_operations *s_export_op" for |
141 | explicit support for exporting, e.g. via NFS. The structure is fully | 141 | explicit support for exporting, e.g. via NFS. The structure is fully |
142 | documented at its declaration in include/linux/fs.h, and in | 142 | documented at its declaration in include/linux/fs.h, and in |
143 | Documentation/filesystems/Exporting. | 143 | Documentation/filesystems/nfs/Exporting. |
144 | 144 | ||
145 | Briefly it allows for the definition of decode_fh and encode_fh operations | 145 | Briefly it allows for the definition of decode_fh and encode_fh operations |
146 | to encode and decode filehandles, and allows the filesystem to use | 146 | to encode and decode filehandles, and allows the filesystem to use |
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 2c48f945546b..96a44dd95e03 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -38,6 +38,7 @@ Table of Contents | |||
38 | 3.3 /proc/<pid>/io - Display the IO accounting fields | 38 | 3.3 /proc/<pid>/io - Display the IO accounting fields |
39 | 3.4 /proc/<pid>/coredump_filter - Core dump filtering settings | 39 | 3.4 /proc/<pid>/coredump_filter - Core dump filtering settings |
40 | 3.5 /proc/<pid>/mountinfo - Information about mounts | 40 | 3.5 /proc/<pid>/mountinfo - Information about mounts |
41 | 3.6 /proc/<pid>/comm & /proc/<pid>/task/<tid>/comm | ||
41 | 42 | ||
42 | 43 | ||
43 | ------------------------------------------------------------------------------ | 44 | ------------------------------------------------------------------------------ |
@@ -163,6 +164,7 @@ read the file /proc/PID/status: | |||
163 | VmExe: 68 kB | 164 | VmExe: 68 kB |
164 | VmLib: 1412 kB | 165 | VmLib: 1412 kB |
165 | VmPTE: 20 kb | 166 | VmPTE: 20 kb |
167 | VmSwap: 0 kB | ||
166 | Threads: 1 | 168 | Threads: 1 |
167 | SigQ: 0/28578 | 169 | SigQ: 0/28578 |
168 | SigPnd: 0000000000000000 | 170 | SigPnd: 0000000000000000 |
@@ -176,7 +178,6 @@ read the file /proc/PID/status: | |||
176 | CapBnd: ffffffffffffffff | 178 | CapBnd: ffffffffffffffff |
177 | voluntary_ctxt_switches: 0 | 179 | voluntary_ctxt_switches: 0 |
178 | nonvoluntary_ctxt_switches: 1 | 180 | nonvoluntary_ctxt_switches: 1 |
179 | Stack usage: 12 kB | ||
180 | 181 | ||
181 | This shows you nearly the same information you would get if you viewed it with | 182 | This shows you nearly the same information you would get if you viewed it with |
182 | the ps command. In fact, ps uses the proc file system to obtain its | 183 | the ps command. In fact, ps uses the proc file system to obtain its |
@@ -188,6 +189,12 @@ memory usage. Its seven fields are explained in Table 1-3. The stat file | |||
188 | contains details information about the process itself. Its fields are | 189 | contains details information about the process itself. Its fields are |
189 | explained in Table 1-4. | 190 | explained in Table 1-4. |
190 | 191 | ||
192 | (for SMP CONFIG users) | ||
193 | For making accounting scalable, RSS related information are handled in | ||
194 | asynchronous manner and the vaule may not be very precise. To see a precise | ||
195 | snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table. | ||
196 | It's slow but very precise. | ||
197 | |||
191 | Table 1-2: Contents of the statm files (as of 2.6.30-rc7) | 198 | Table 1-2: Contents of the statm files (as of 2.6.30-rc7) |
192 | .............................................................................. | 199 | .............................................................................. |
193 | Field Content | 200 | Field Content |
@@ -213,6 +220,7 @@ Table 1-2: Contents of the statm files (as of 2.6.30-rc7) | |||
213 | VmExe size of text segment | 220 | VmExe size of text segment |
214 | VmLib size of shared library code | 221 | VmLib size of shared library code |
215 | VmPTE size of page table entries | 222 | VmPTE size of page table entries |
223 | VmSwap size of swap usage (the number of referred swapents) | ||
216 | Threads number of threads | 224 | Threads number of threads |
217 | SigQ number of signals queued/max. number for queue | 225 | SigQ number of signals queued/max. number for queue |
218 | SigPnd bitmap of pending signals for the thread | 226 | SigPnd bitmap of pending signals for the thread |
@@ -230,7 +238,6 @@ Table 1-2: Contents of the statm files (as of 2.6.30-rc7) | |||
230 | Mems_allowed_list Same as previous, but in "list format" | 238 | Mems_allowed_list Same as previous, but in "list format" |
231 | voluntary_ctxt_switches number of voluntary context switches | 239 | voluntary_ctxt_switches number of voluntary context switches |
232 | nonvoluntary_ctxt_switches number of non voluntary context switches | 240 | nonvoluntary_ctxt_switches number of non voluntary context switches |
233 | Stack usage: stack usage high water mark (round up to page size) | ||
234 | .............................................................................. | 241 | .............................................................................. |
235 | 242 | ||
236 | Table 1-3: Contents of the statm files (as of 2.6.8-rc3) | 243 | Table 1-3: Contents of the statm files (as of 2.6.8-rc3) |
@@ -431,6 +438,7 @@ Table 1-5: Kernel info in /proc | |||
431 | modules List of loaded modules | 438 | modules List of loaded modules |
432 | mounts Mounted filesystems | 439 | mounts Mounted filesystems |
433 | net Networking info (see text) | 440 | net Networking info (see text) |
441 | pagetypeinfo Additional page allocator information (see text) (2.5) | ||
434 | partitions Table of partitions known to the system | 442 | partitions Table of partitions known to the system |
435 | pci Deprecated info of PCI bus (new way -> /proc/bus/pci/, | 443 | pci Deprecated info of PCI bus (new way -> /proc/bus/pci/, |
436 | decoupled by lspci (2.4) | 444 | decoupled by lspci (2.4) |
@@ -585,7 +593,7 @@ Node 0, zone DMA 0 4 5 4 4 3 ... | |||
585 | Node 0, zone Normal 1 0 0 1 101 8 ... | 593 | Node 0, zone Normal 1 0 0 1 101 8 ... |
586 | Node 0, zone HighMem 2 0 0 1 1 0 ... | 594 | Node 0, zone HighMem 2 0 0 1 1 0 ... |
587 | 595 | ||
588 | Memory fragmentation is a problem under some workloads, and buddyinfo is a | 596 | External fragmentation is a problem under some workloads, and buddyinfo is a |
589 | useful tool for helping diagnose these problems. Buddyinfo will give you a | 597 | useful tool for helping diagnose these problems. Buddyinfo will give you a |
590 | clue as to how big an area you can safely allocate, or why a previous | 598 | clue as to how big an area you can safely allocate, or why a previous |
591 | allocation failed. | 599 | allocation failed. |
@@ -595,6 +603,48 @@ available. In this case, there are 0 chunks of 2^0*PAGE_SIZE available in | |||
595 | ZONE_DMA, 4 chunks of 2^1*PAGE_SIZE in ZONE_DMA, 101 chunks of 2^4*PAGE_SIZE | 603 | ZONE_DMA, 4 chunks of 2^1*PAGE_SIZE in ZONE_DMA, 101 chunks of 2^4*PAGE_SIZE |
596 | available in ZONE_NORMAL, etc... | 604 | available in ZONE_NORMAL, etc... |
597 | 605 | ||
606 | More information relevant to external fragmentation can be found in | ||
607 | pagetypeinfo. | ||
608 | |||
609 | > cat /proc/pagetypeinfo | ||
610 | Page block order: 9 | ||
611 | Pages per block: 512 | ||
612 | |||
613 | Free pages count per migrate type at order 0 1 2 3 4 5 6 7 8 9 10 | ||
614 | Node 0, zone DMA, type Unmovable 0 0 0 1 1 1 1 1 1 1 0 | ||
615 | Node 0, zone DMA, type Reclaimable 0 0 0 0 0 0 0 0 0 0 0 | ||
616 | Node 0, zone DMA, type Movable 1 1 2 1 2 1 1 0 1 0 2 | ||
617 | Node 0, zone DMA, type Reserve 0 0 0 0 0 0 0 0 0 1 0 | ||
618 | Node 0, zone DMA, type Isolate 0 0 0 0 0 0 0 0 0 0 0 | ||
619 | Node 0, zone DMA32, type Unmovable 103 54 77 1 1 1 11 8 7 1 9 | ||
620 | Node 0, zone DMA32, type Reclaimable 0 0 2 1 0 0 0 0 1 0 0 | ||
621 | Node 0, zone DMA32, type Movable 169 152 113 91 77 54 39 13 6 1 452 | ||
622 | Node 0, zone DMA32, type Reserve 1 2 2 2 2 0 1 1 1 1 0 | ||
623 | Node 0, zone DMA32, type Isolate 0 0 0 0 0 0 0 0 0 0 0 | ||
624 | |||
625 | Number of blocks type Unmovable Reclaimable Movable Reserve Isolate | ||
626 | Node 0, zone DMA 2 0 5 1 0 | ||
627 | Node 0, zone DMA32 41 6 967 2 0 | ||
628 | |||
629 | Fragmentation avoidance in the kernel works by grouping pages of different | ||
630 | migrate types into the same contiguous regions of memory called page blocks. | ||
631 | A page block is typically the size of the default hugepage size e.g. 2MB on | ||
632 | X86-64. By keeping pages grouped based on their ability to move, the kernel | ||
633 | can reclaim pages within a page block to satisfy a high-order allocation. | ||
634 | |||
635 | The pagetypinfo begins with information on the size of a page block. It | ||
636 | then gives the same type of information as buddyinfo except broken down | ||
637 | by migrate-type and finishes with details on how many page blocks of each | ||
638 | type exist. | ||
639 | |||
640 | If min_free_kbytes has been tuned correctly (recommendations made by hugeadm | ||
641 | from libhugetlbfs http://sourceforge.net/projects/libhugetlbfs/), one can | ||
642 | make an estimate of the likely number of huge pages that can be allocated | ||
643 | at a given point in time. All the "Movable" blocks should be allocatable | ||
644 | unless memory has been mlock()'d. Some of the Reclaimable blocks should | ||
645 | also be allocatable although a lot of filesystem metadata may have to be | ||
646 | reclaimed to achieve this. | ||
647 | |||
598 | .............................................................................. | 648 | .............................................................................. |
599 | 649 | ||
600 | meminfo: | 650 | meminfo: |
@@ -1072,7 +1122,8 @@ second). The meanings of the columns are as follows, from left to right: | |||
1072 | - irq: servicing interrupts | 1122 | - irq: servicing interrupts |
1073 | - softirq: servicing softirqs | 1123 | - softirq: servicing softirqs |
1074 | - steal: involuntary wait | 1124 | - steal: involuntary wait |
1075 | - guest: running a guest | 1125 | - guest: running a normal guest |
1126 | - guest_nice: running a niced guest | ||
1076 | 1127 | ||
1077 | The "intr" line gives counts of interrupts serviced since boot time, for each | 1128 | The "intr" line gives counts of interrupts serviced since boot time, for each |
1078 | of the possible system interrupts. The first column is the total of all | 1129 | of the possible system interrupts. The first column is the total of all |
@@ -1088,8 +1139,8 @@ The "processes" line gives the number of processes and threads created, which | |||
1088 | includes (but is not limited to) those created by calls to the fork() and | 1139 | includes (but is not limited to) those created by calls to the fork() and |
1089 | clone() system calls. | 1140 | clone() system calls. |
1090 | 1141 | ||
1091 | The "procs_running" line gives the number of processes currently running on | 1142 | The "procs_running" line gives the total number of threads that are |
1092 | CPUs. | 1143 | running or ready to run (i.e., the total number of runnable threads). |
1093 | 1144 | ||
1094 | The "procs_blocked" line gives the number of processes currently blocked, | 1145 | The "procs_blocked" line gives the number of processes currently blocked, |
1095 | waiting for I/O to complete. | 1146 | waiting for I/O to complete. |
@@ -1408,3 +1459,11 @@ For more information on mount propagation see: | |||
1408 | 1459 | ||
1409 | Documentation/filesystems/sharedsubtree.txt | 1460 | Documentation/filesystems/sharedsubtree.txt |
1410 | 1461 | ||
1462 | |||
1463 | 3.6 /proc/<pid>/comm & /proc/<pid>/task/<tid>/comm | ||
1464 | -------------------------------------------------------- | ||
1465 | These files provide a method to access a tasks comm value. It also allows for | ||
1466 | a task to set its own or one of its thread siblings comm value. The comm value | ||
1467 | is limited in size compared to the cmdline value, so writing anything longer | ||
1468 | then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated | ||
1469 | comm value. | ||
diff --git a/Documentation/filesystems/seq_file.txt b/Documentation/filesystems/seq_file.txt index 0d15ebccf5b0..a1e2e0dda907 100644 --- a/Documentation/filesystems/seq_file.txt +++ b/Documentation/filesystems/seq_file.txt | |||
@@ -248,9 +248,7 @@ code, that is done in the initialization code in the usual way: | |||
248 | { | 248 | { |
249 | struct proc_dir_entry *entry; | 249 | struct proc_dir_entry *entry; |
250 | 250 | ||
251 | entry = create_proc_entry("sequence", 0, NULL); | 251 | proc_create("sequence", 0, NULL, &ct_file_ops); |
252 | if (entry) | ||
253 | entry->proc_fops = &ct_file_ops; | ||
254 | return 0; | 252 | return 0; |
255 | } | 253 | } |
256 | 254 | ||
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt index 23a181074f94..fc0e39af43c3 100644 --- a/Documentation/filesystems/sharedsubtree.txt +++ b/Documentation/filesystems/sharedsubtree.txt | |||
@@ -837,6 +837,9 @@ replicas continue to be exactly same. | |||
837 | individual lists does not affect propagation or the way propagation | 837 | individual lists does not affect propagation or the way propagation |
838 | tree is modified by operations. | 838 | tree is modified by operations. |
839 | 839 | ||
840 | All vfsmounts in a peer group have the same ->mnt_master. If it is | ||
841 | non-NULL, they form a contiguous (ordered) segment of slave list. | ||
842 | |||
840 | A example propagation tree looks as shown in the figure below. | 843 | A example propagation tree looks as shown in the figure below. |
841 | [ NOTE: Though it looks like a forest, if we consider all the shared | 844 | [ NOTE: Though it looks like a forest, if we consider all the shared |
842 | mounts as a conceptual entity called 'pnode', it becomes a tree] | 845 | mounts as a conceptual entity called 'pnode', it becomes a tree] |
@@ -874,8 +877,19 @@ replicas continue to be exactly same. | |||
874 | 877 | ||
875 | NOTE: The propagation tree is orthogonal to the mount tree. | 878 | NOTE: The propagation tree is orthogonal to the mount tree. |
876 | 879 | ||
880 | 8B Locking: | ||
881 | |||
882 | ->mnt_share, ->mnt_slave, ->mnt_slave_list, ->mnt_master are protected | ||
883 | by namespace_sem (exclusive for modifications, shared for reading). | ||
884 | |||
885 | Normally we have ->mnt_flags modifications serialized by vfsmount_lock. | ||
886 | There are two exceptions: do_add_mount() and clone_mnt(). | ||
887 | The former modifies a vfsmount that has not been visible in any shared | ||
888 | data structures yet. | ||
889 | The latter holds namespace_sem and the only references to vfsmount | ||
890 | are in lists that can't be traversed without namespace_sem. | ||
877 | 891 | ||
878 | 8B Algorithm: | 892 | 8C Algorithm: |
879 | 893 | ||
880 | The crux of the implementation resides in rbind/move operation. | 894 | The crux of the implementation resides in rbind/move operation. |
881 | 895 | ||
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt index b245d524d568..931c806642c5 100644 --- a/Documentation/filesystems/sysfs.txt +++ b/Documentation/filesystems/sysfs.txt | |||
@@ -91,8 +91,8 @@ struct device_attribute { | |||
91 | const char *buf, size_t count); | 91 | const char *buf, size_t count); |
92 | }; | 92 | }; |
93 | 93 | ||
94 | int device_create_file(struct device *, struct device_attribute *); | 94 | int device_create_file(struct device *, const struct device_attribute *); |
95 | void device_remove_file(struct device *, struct device_attribute *); | 95 | void device_remove_file(struct device *, const struct device_attribute *); |
96 | 96 | ||
97 | It also defines this helper for defining device attributes: | 97 | It also defines this helper for defining device attributes: |
98 | 98 | ||
@@ -316,8 +316,8 @@ DEVICE_ATTR(_name, _mode, _show, _store); | |||
316 | 316 | ||
317 | Creation/Removal: | 317 | Creation/Removal: |
318 | 318 | ||
319 | int device_create_file(struct device *device, struct device_attribute * attr); | 319 | int device_create_file(struct device *dev, const struct device_attribute * attr); |
320 | void device_remove_file(struct device * dev, struct device_attribute * attr); | 320 | void device_remove_file(struct device *dev, const struct device_attribute * attr); |
321 | 321 | ||
322 | 322 | ||
323 | - bus drivers (include/linux/device.h) | 323 | - bus drivers (include/linux/device.h) |
@@ -358,7 +358,7 @@ DRIVER_ATTR(_name, _mode, _show, _store) | |||
358 | 358 | ||
359 | Creation/Removal: | 359 | Creation/Removal: |
360 | 360 | ||
361 | int driver_create_file(struct device_driver *, struct driver_attribute *); | 361 | int driver_create_file(struct device_driver *, const struct driver_attribute *); |
362 | void driver_remove_file(struct device_driver *, struct driver_attribute *); | 362 | void driver_remove_file(struct device_driver *, const struct driver_attribute *); |
363 | 363 | ||
364 | 364 | ||
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 623f094c9d8d..3de2f32edd90 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
@@ -472,7 +472,7 @@ __sync_single_inode) to check if ->writepages has been successful in | |||
472 | writing out the whole address_space. | 472 | writing out the whole address_space. |
473 | 473 | ||
474 | The Writeback tag is used by filemap*wait* and sync_page* functions, | 474 | The Writeback tag is used by filemap*wait* and sync_page* functions, |
475 | via wait_on_page_writeback_range, to wait for all writeback to | 475 | via filemap_fdatawait_range, to wait for all writeback to |
476 | complete. While waiting ->sync_page (if defined) will be called on | 476 | complete. While waiting ->sync_page (if defined) will be called on |
477 | each page that is found to require writeback. | 477 | each page that is found to require writeback. |
478 | 478 | ||