diff options
Diffstat (limited to 'Documentation/filesystems')
29 files changed, 609 insertions, 119 deletions
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index f15621ee5599..3bae418c6ad3 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX | |||
@@ -1,7 +1,5 @@ | |||
1 | 00-INDEX | 1 | 00-INDEX |
2 | - this file (info on some of the filesystems supported by linux). | 2 | - this file (info on some of the filesystems supported by linux). |
3 | Exporting | ||
4 | - explanation of how to make filesystems exportable. | ||
5 | Locking | 3 | Locking |
6 | - info on locking rules as they pertain to Linux VFS. | 4 | - info on locking rules as they pertain to Linux VFS. |
7 | 9p.txt | 5 | 9p.txt |
@@ -34,8 +32,12 @@ dlmfs.txt | |||
34 | - info on the userspace interface to the OCFS2 DLM. | 32 | - info on the userspace interface to the OCFS2 DLM. |
35 | dnotify.txt | 33 | dnotify.txt |
36 | - info about directory notification in Linux. | 34 | - info about directory notification in Linux. |
35 | dnotify_test.c | ||
36 | - example program for dnotify | ||
37 | ecryptfs.txt | 37 | ecryptfs.txt |
38 | - docs on eCryptfs: stacked cryptographic filesystem for Linux. | 38 | - docs on eCryptfs: stacked cryptographic filesystem for Linux. |
39 | exofs.txt | ||
40 | - info, usage, mount options, design about EXOFS. | ||
39 | ext2.txt | 41 | ext2.txt |
40 | - info, mount options and specifications for the Ext2 filesystem. | 42 | - info, mount options and specifications for the Ext2 filesystem. |
41 | ext3.txt | 43 | ext3.txt |
@@ -62,16 +64,14 @@ jfs.txt | |||
62 | - info and mount options for the JFS filesystem. | 64 | - info and mount options for the JFS filesystem. |
63 | locks.txt | 65 | locks.txt |
64 | - info on file locking implementations, flock() vs. fcntl(), etc. | 66 | - info on file locking implementations, flock() vs. fcntl(), etc. |
67 | logfs.txt | ||
68 | - info on the LogFS flash filesystem. | ||
65 | mandatory-locking.txt | 69 | mandatory-locking.txt |
66 | - info on the Linux implementation of Sys V mandatory file locking. | 70 | - info on the Linux implementation of Sys V mandatory file locking. |
67 | ncpfs.txt | 71 | ncpfs.txt |
68 | - info on Novell Netware(tm) filesystem using NCP protocol. | 72 | - info on Novell Netware(tm) filesystem using NCP protocol. |
69 | nfs41-server.txt | 73 | nfs/ |
70 | - info on the Linux server implementation of NFSv4 minor version 1. | 74 | - nfs-related documentation. |
71 | nfs-rdma.txt | ||
72 | - how to install and setup the Linux NFS/RDMA client and server software. | ||
73 | nfsroot.txt | ||
74 | - short guide on setting up a diskless box with NFS root filesystem. | ||
75 | nilfs2.txt | 75 | nilfs2.txt |
76 | - info and mount options for the NILFS2 filesystem. | 76 | - info and mount options for the NILFS2 filesystem. |
77 | ntfs.txt | 77 | ntfs.txt |
@@ -90,8 +90,6 @@ relay.txt | |||
90 | - info on relay, for efficient streaming from kernel to user space. | 90 | - info on relay, for efficient streaming from kernel to user space. |
91 | romfs.txt | 91 | romfs.txt |
92 | - description of the ROMFS filesystem. | 92 | - description of the ROMFS filesystem. |
93 | rpc-cache.txt | ||
94 | - introduction to the caching mechanisms in the sunrpc layer. | ||
95 | seq_file.txt | 93 | seq_file.txt |
96 | - how to use the seq_file API | 94 | - how to use the seq_file API |
97 | sharedsubtree.txt | 95 | sharedsubtree.txt |
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 18b9d0ca0630..06bbbed71206 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -460,13 +460,6 @@ in sys_read() and friends. | |||
460 | 460 | ||
461 | --------------------------- dquot_operations ------------------------------- | 461 | --------------------------- dquot_operations ------------------------------- |
462 | prototypes: | 462 | prototypes: |
463 | int (*initialize) (struct inode *, int); | ||
464 | int (*drop) (struct inode *); | ||
465 | int (*alloc_space) (struct inode *, qsize_t, int); | ||
466 | int (*alloc_inode) (const struct inode *, unsigned long); | ||
467 | int (*free_space) (struct inode *, qsize_t); | ||
468 | int (*free_inode) (const struct inode *, unsigned long); | ||
469 | int (*transfer) (struct inode *, struct iattr *); | ||
470 | int (*write_dquot) (struct dquot *); | 463 | int (*write_dquot) (struct dquot *); |
471 | int (*acquire_dquot) (struct dquot *); | 464 | int (*acquire_dquot) (struct dquot *); |
472 | int (*release_dquot) (struct dquot *); | 465 | int (*release_dquot) (struct dquot *); |
@@ -479,13 +472,6 @@ a proper locking wrt the filesystem and call the generic quota operations. | |||
479 | What filesystem should expect from the generic quota functions: | 472 | What filesystem should expect from the generic quota functions: |
480 | 473 | ||
481 | FS recursion Held locks when called | 474 | FS recursion Held locks when called |
482 | initialize: yes maybe dqonoff_sem | ||
483 | drop: yes - | ||
484 | alloc_space: ->mark_dirty() - | ||
485 | alloc_inode: ->mark_dirty() - | ||
486 | free_space: ->mark_dirty() - | ||
487 | free_inode: ->mark_dirty() - | ||
488 | transfer: yes - | ||
489 | write_dquot: yes dqonoff_sem or dqptr_sem | 475 | write_dquot: yes dqonoff_sem or dqptr_sem |
490 | acquire_dquot: yes dqonoff_sem or dqptr_sem | 476 | acquire_dquot: yes dqonoff_sem or dqptr_sem |
491 | release_dquot: yes dqonoff_sem or dqptr_sem | 477 | release_dquot: yes dqonoff_sem or dqptr_sem |
@@ -495,10 +481,6 @@ write_info: yes dqonoff_sem | |||
495 | FS recursion means calling ->quota_read() and ->quota_write() from superblock | 481 | FS recursion means calling ->quota_read() and ->quota_write() from superblock |
496 | operations. | 482 | operations. |
497 | 483 | ||
498 | ->alloc_space(), ->alloc_inode(), ->free_space(), ->free_inode() are called | ||
499 | only directly by the filesystem and do not call any fs functions only | ||
500 | the ->mark_dirty() operation. | ||
501 | |||
502 | More details about quota locking can be found in fs/dquot.c. | 484 | More details about quota locking can be found in fs/dquot.c. |
503 | 485 | ||
504 | --------------------------- vm_operations_struct ----------------------------- | 486 | --------------------------- vm_operations_struct ----------------------------- |
diff --git a/Documentation/filesystems/Makefile b/Documentation/filesystems/Makefile new file mode 100644 index 000000000000..a5dd114da14f --- /dev/null +++ b/Documentation/filesystems/Makefile | |||
@@ -0,0 +1,8 @@ | |||
1 | # kbuild trick to avoid linker error. Can be omitted if a module is built. | ||
2 | obj- := dummy.o | ||
3 | |||
4 | # List of programs to build | ||
5 | hostprogs-y := dnotify_test | ||
6 | |||
7 | # Tell kbuild to always build the programs | ||
8 | always := $(hostprogs-y) | ||
diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 9e94b9491d89..a91e2e2095b0 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt | |||
@@ -235,6 +235,7 @@ proc files. | |||
235 | neg=N Number of negative lookups made | 235 | neg=N Number of negative lookups made |
236 | pos=N Number of positive lookups made | 236 | pos=N Number of positive lookups made |
237 | crt=N Number of objects created by lookup | 237 | crt=N Number of objects created by lookup |
238 | tmo=N Number of lookups timed out and requeued | ||
238 | Updates n=N Number of update cookie requests seen | 239 | Updates n=N Number of update cookie requests seen |
239 | nul=N Number of upd reqs given a NULL parent | 240 | nul=N Number of upd reqs given a NULL parent |
240 | run=N Number of upd reqs granted CPU time | 241 | run=N Number of upd reqs granted CPU time |
@@ -250,8 +251,10 @@ proc files. | |||
250 | ok=N Number of successful alloc reqs | 251 | ok=N Number of successful alloc reqs |
251 | wt=N Number of alloc reqs that waited on lookup completion | 252 | wt=N Number of alloc reqs that waited on lookup completion |
252 | nbf=N Number of alloc reqs rejected -ENOBUFS | 253 | nbf=N Number of alloc reqs rejected -ENOBUFS |
254 | int=N Number of alloc reqs aborted -ERESTARTSYS | ||
253 | ops=N Number of alloc reqs submitted | 255 | ops=N Number of alloc reqs submitted |
254 | owt=N Number of alloc reqs waited for CPU time | 256 | owt=N Number of alloc reqs waited for CPU time |
257 | abt=N Number of alloc reqs aborted due to object death | ||
255 | Retrvls n=N Number of retrieval (read) requests seen | 258 | Retrvls n=N Number of retrieval (read) requests seen |
256 | ok=N Number of successful retr reqs | 259 | ok=N Number of successful retr reqs |
257 | wt=N Number of retr reqs that waited on lookup completion | 260 | wt=N Number of retr reqs that waited on lookup completion |
@@ -261,6 +264,7 @@ proc files. | |||
261 | oom=N Number of retr reqs failed -ENOMEM | 264 | oom=N Number of retr reqs failed -ENOMEM |
262 | ops=N Number of retr reqs submitted | 265 | ops=N Number of retr reqs submitted |
263 | owt=N Number of retr reqs waited for CPU time | 266 | owt=N Number of retr reqs waited for CPU time |
267 | abt=N Number of retr reqs aborted due to object death | ||
264 | Stores n=N Number of storage (write) requests seen | 268 | Stores n=N Number of storage (write) requests seen |
265 | ok=N Number of successful store reqs | 269 | ok=N Number of successful store reqs |
266 | agn=N Number of store reqs on a page already pending storage | 270 | agn=N Number of store reqs on a page already pending storage |
@@ -268,12 +272,37 @@ proc files. | |||
268 | oom=N Number of store reqs failed -ENOMEM | 272 | oom=N Number of store reqs failed -ENOMEM |
269 | ops=N Number of store reqs submitted | 273 | ops=N Number of store reqs submitted |
270 | run=N Number of store reqs granted CPU time | 274 | run=N Number of store reqs granted CPU time |
275 | pgs=N Number of pages given store req processing time | ||
276 | rxd=N Number of store reqs deleted from tracking tree | ||
277 | olm=N Number of store reqs over store limit | ||
278 | VmScan nos=N Number of release reqs against pages with no pending store | ||
279 | gon=N Number of release reqs against pages stored by time lock granted | ||
280 | bsy=N Number of release reqs ignored due to in-progress store | ||
281 | can=N Number of page stores cancelled due to release req | ||
271 | Ops pend=N Number of times async ops added to pending queues | 282 | Ops pend=N Number of times async ops added to pending queues |
272 | run=N Number of times async ops given CPU time | 283 | run=N Number of times async ops given CPU time |
273 | enq=N Number of times async ops queued for processing | 284 | enq=N Number of times async ops queued for processing |
285 | can=N Number of async ops cancelled | ||
286 | rej=N Number of async ops rejected due to object lookup/create failure | ||
274 | dfr=N Number of async ops queued for deferred release | 287 | dfr=N Number of async ops queued for deferred release |
275 | rel=N Number of async ops released | 288 | rel=N Number of async ops released |
276 | gc=N Number of deferred-release async ops garbage collected | 289 | gc=N Number of deferred-release async ops garbage collected |
290 | CacheOp alo=N Number of in-progress alloc_object() cache ops | ||
291 | luo=N Number of in-progress lookup_object() cache ops | ||
292 | luc=N Number of in-progress lookup_complete() cache ops | ||
293 | gro=N Number of in-progress grab_object() cache ops | ||
294 | upo=N Number of in-progress update_object() cache ops | ||
295 | dro=N Number of in-progress drop_object() cache ops | ||
296 | pto=N Number of in-progress put_object() cache ops | ||
297 | syn=N Number of in-progress sync_cache() cache ops | ||
298 | atc=N Number of in-progress attr_changed() cache ops | ||
299 | rap=N Number of in-progress read_or_alloc_page() cache ops | ||
300 | ras=N Number of in-progress read_or_alloc_pages() cache ops | ||
301 | alp=N Number of in-progress allocate_page() cache ops | ||
302 | als=N Number of in-progress allocate_pages() cache ops | ||
303 | wrp=N Number of in-progress write_page() cache ops | ||
304 | ucp=N Number of in-progress uncache_page() cache ops | ||
305 | dsp=N Number of in-progress dissociate_pages() cache ops | ||
277 | 306 | ||
278 | 307 | ||
279 | (*) /proc/fs/fscache/histogram | 308 | (*) /proc/fs/fscache/histogram |
@@ -299,6 +328,87 @@ proc files. | |||
299 | jiffy range covered, and the SECS field the equivalent number of seconds. | 328 | jiffy range covered, and the SECS field the equivalent number of seconds. |
300 | 329 | ||
301 | 330 | ||
331 | =========== | ||
332 | OBJECT LIST | ||
333 | =========== | ||
334 | |||
335 | If CONFIG_FSCACHE_OBJECT_LIST is enabled, the FS-Cache facility will maintain a | ||
336 | list of all the objects currently allocated and allow them to be viewed | ||
337 | through: | ||
338 | |||
339 | /proc/fs/fscache/objects | ||
340 | |||
341 | This will look something like: | ||
342 | |||
343 | [root@andromeda ~]# head /proc/fs/fscache/objects | ||
344 | OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA OBJECT_KEY, AUX_DATA | ||
345 | ======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================ | ||
346 | 17e4b 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a | ||
347 | 1693a 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a | ||
348 | |||
349 | where the first set of columns before the '|' describe the object: | ||
350 | |||
351 | COLUMN DESCRIPTION | ||
352 | ======= =============================================================== | ||
353 | OBJECT Object debugging ID (appears as OBJ%x in some debug messages) | ||
354 | PARENT Debugging ID of parent object | ||
355 | STAT Object state | ||
356 | CHLDN Number of child objects of this object | ||
357 | OPS Number of outstanding operations on this object | ||
358 | OOP Number of outstanding child object management operations | ||
359 | IPR | ||
360 | EX Number of outstanding exclusive operations | ||
361 | READS Number of outstanding read operations | ||
362 | EM Object's event mask | ||
363 | EV Events raised on this object | ||
364 | F Object flags | ||
365 | S Object slow-work work item flags | ||
366 | |||
367 | and the second set of columns describe the object's cookie, if present: | ||
368 | |||
369 | COLUMN DESCRIPTION | ||
370 | =============== ======================================================= | ||
371 | NETFS_COOKIE_DEF Name of netfs cookie definition | ||
372 | TY Cookie type (IX - index, DT - data, hex - special) | ||
373 | FL Cookie flags | ||
374 | NETFS_DATA Netfs private data stored in the cookie | ||
375 | OBJECT_KEY Object key } 1 column, with separating comma | ||
376 | AUX_DATA Object aux data } presence may be configured | ||
377 | |||
378 | The data shown may be filtered by attaching the a key to an appropriate keyring | ||
379 | before viewing the file. Something like: | ||
380 | |||
381 | keyctl add user fscache:objlist <restrictions> @s | ||
382 | |||
383 | where <restrictions> are a selection of the following letters: | ||
384 | |||
385 | K Show hexdump of object key (don't show if not given) | ||
386 | A Show hexdump of object aux data (don't show if not given) | ||
387 | |||
388 | and the following paired letters: | ||
389 | |||
390 | C Show objects that have a cookie | ||
391 | c Show objects that don't have a cookie | ||
392 | B Show objects that are busy | ||
393 | b Show objects that aren't busy | ||
394 | W Show objects that have pending writes | ||
395 | w Show objects that don't have pending writes | ||
396 | R Show objects that have outstanding reads | ||
397 | r Show objects that don't have outstanding reads | ||
398 | S Show objects that have slow work queued | ||
399 | s Show objects that don't have slow work queued | ||
400 | |||
401 | If neither side of a letter pair is given, then both are implied. For example: | ||
402 | |||
403 | keyctl add user fscache:objlist KB @s | ||
404 | |||
405 | shows objects that are busy, and lists their object keys, but does not dump | ||
406 | their auxiliary data. It also implies "CcWwRrSs", but as 'B' is given, 'b' is | ||
407 | not implied. | ||
408 | |||
409 | By default all objects and all fields will be shown. | ||
410 | |||
411 | |||
302 | ========= | 412 | ========= |
303 | DEBUGGING | 413 | DEBUGGING |
304 | ========= | 414 | ========= |
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt index 2666b1ed5e9e..1902c57b72ef 100644 --- a/Documentation/filesystems/caching/netfs-api.txt +++ b/Documentation/filesystems/caching/netfs-api.txt | |||
@@ -641,7 +641,7 @@ data file must be retired (see the relinquish cookie function below). | |||
641 | 641 | ||
642 | Furthermore, note that this does not cancel the asynchronous read or write | 642 | Furthermore, note that this does not cancel the asynchronous read or write |
643 | operation started by the read/alloc and write functions, so the page | 643 | operation started by the read/alloc and write functions, so the page |
644 | invalidation and release functions must use: | 644 | invalidation functions must use: |
645 | 645 | ||
646 | bool fscache_check_page_write(struct fscache_cookie *cookie, | 646 | bool fscache_check_page_write(struct fscache_cookie *cookie, |
647 | struct page *page); | 647 | struct page *page); |
@@ -654,6 +654,25 @@ to see if a page is being written to the cache, and: | |||
654 | to wait for it to finish if it is. | 654 | to wait for it to finish if it is. |
655 | 655 | ||
656 | 656 | ||
657 | When releasepage() is being implemented, a special FS-Cache function exists to | ||
658 | manage the heuristics of coping with vmscan trying to eject pages, which may | ||
659 | conflict with the cache trying to write pages to the cache (which may itself | ||
660 | need to allocate memory): | ||
661 | |||
662 | bool fscache_maybe_release_page(struct fscache_cookie *cookie, | ||
663 | struct page *page, | ||
664 | gfp_t gfp); | ||
665 | |||
666 | This takes the netfs cookie, and the page and gfp arguments as supplied to | ||
667 | releasepage(). It will return false if the page cannot be released yet for | ||
668 | some reason and if it returns true, the page has been uncached and can now be | ||
669 | released. | ||
670 | |||
671 | To make a page available for release, this function may wait for an outstanding | ||
672 | storage request to complete, or it may attempt to cancel the storage request - | ||
673 | in which case the page will not be stored in the cache this time. | ||
674 | |||
675 | |||
657 | ========================== | 676 | ========================== |
658 | INDEX AND DATA FILE UPDATE | 677 | INDEX AND DATA FILE UPDATE |
659 | ========================== | 678 | ========================== |
diff --git a/Documentation/filesystems/dentry-locking.txt b/Documentation/filesystems/dentry-locking.txt index 4c0c575a4012..79334ed5daa7 100644 --- a/Documentation/filesystems/dentry-locking.txt +++ b/Documentation/filesystems/dentry-locking.txt | |||
@@ -62,7 +62,8 @@ changes are : | |||
62 | 2. Insertion of a dentry into the hash table is done using | 62 | 2. Insertion of a dentry into the hash table is done using |
63 | hlist_add_head_rcu() which take care of ordering the writes - the | 63 | hlist_add_head_rcu() which take care of ordering the writes - the |
64 | writes to the dentry must be visible before the dentry is | 64 | writes to the dentry must be visible before the dentry is |
65 | inserted. This works in conjunction with hlist_for_each_rcu() while | 65 | inserted. This works in conjunction with hlist_for_each_rcu(), |
66 | which has since been replaced by hlist_for_each_entry_rcu(), while | ||
66 | walking the hash chain. The only requirement is that all | 67 | walking the hash chain. The only requirement is that all |
67 | initialization to the dentry must be done before | 68 | initialization to the dentry must be done before |
68 | hlist_add_head_rcu() since we don't have dcache_lock protection | 69 | hlist_add_head_rcu() since we don't have dcache_lock protection |
diff --git a/Documentation/filesystems/dnotify.txt b/Documentation/filesystems/dnotify.txt index 9f5d338ddbb8..6baf88f46859 100644 --- a/Documentation/filesystems/dnotify.txt +++ b/Documentation/filesystems/dnotify.txt | |||
@@ -62,38 +62,9 @@ disabled, fcntl(fd, F_NOTIFY, ...) will return -EINVAL. | |||
62 | 62 | ||
63 | Example | 63 | Example |
64 | ------- | 64 | ------- |
65 | See Documentation/filesystems/dnotify_test.c for an example. | ||
65 | 66 | ||
66 | #define _GNU_SOURCE /* needed to get the defines */ | 67 | NOTE |
67 | #include <fcntl.h> /* in glibc 2.2 this has the needed | 68 | ---- |
68 | values defined */ | 69 | Beginning with Linux 2.6.13, dnotify has been replaced by inotify. |
69 | #include <signal.h> | 70 | See Documentation/filesystems/inotify.txt for more information on it. |
70 | #include <stdio.h> | ||
71 | #include <unistd.h> | ||
72 | |||
73 | static volatile int event_fd; | ||
74 | |||
75 | static void handler(int sig, siginfo_t *si, void *data) | ||
76 | { | ||
77 | event_fd = si->si_fd; | ||
78 | } | ||
79 | |||
80 | int main(void) | ||
81 | { | ||
82 | struct sigaction act; | ||
83 | int fd; | ||
84 | |||
85 | act.sa_sigaction = handler; | ||
86 | sigemptyset(&act.sa_mask); | ||
87 | act.sa_flags = SA_SIGINFO; | ||
88 | sigaction(SIGRTMIN + 1, &act, NULL); | ||
89 | |||
90 | fd = open(".", O_RDONLY); | ||
91 | fcntl(fd, F_SETSIG, SIGRTMIN + 1); | ||
92 | fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT); | ||
93 | /* we will now be notified if any of the files | ||
94 | in "." is modified or new files are created */ | ||
95 | while (1) { | ||
96 | pause(); | ||
97 | printf("Got event on fd=%d\n", event_fd); | ||
98 | } | ||
99 | } | ||
diff --git a/Documentation/filesystems/dnotify_test.c b/Documentation/filesystems/dnotify_test.c new file mode 100644 index 000000000000..8b37b4a1e18d --- /dev/null +++ b/Documentation/filesystems/dnotify_test.c | |||
@@ -0,0 +1,34 @@ | |||
1 | #define _GNU_SOURCE /* needed to get the defines */ | ||
2 | #include <fcntl.h> /* in glibc 2.2 this has the needed | ||
3 | values defined */ | ||
4 | #include <signal.h> | ||
5 | #include <stdio.h> | ||
6 | #include <unistd.h> | ||
7 | |||
8 | static volatile int event_fd; | ||
9 | |||
10 | static void handler(int sig, siginfo_t *si, void *data) | ||
11 | { | ||
12 | event_fd = si->si_fd; | ||
13 | } | ||
14 | |||
15 | int main(void) | ||
16 | { | ||
17 | struct sigaction act; | ||
18 | int fd; | ||
19 | |||
20 | act.sa_sigaction = handler; | ||
21 | sigemptyset(&act.sa_mask); | ||
22 | act.sa_flags = SA_SIGINFO; | ||
23 | sigaction(SIGRTMIN + 1, &act, NULL); | ||
24 | |||
25 | fd = open(".", O_RDONLY); | ||
26 | fcntl(fd, F_SETSIG, SIGRTMIN + 1); | ||
27 | fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT); | ||
28 | /* we will now be notified if any of the files | ||
29 | in "." is modified or new files are created */ | ||
30 | while (1) { | ||
31 | pause(); | ||
32 | printf("Got event on fd=%d\n", event_fd); | ||
33 | } | ||
34 | } | ||
diff --git a/Documentation/filesystems/exofs.txt b/Documentation/filesystems/exofs.txt index 0ced74c2f73c..abd2a9b5b787 100644 --- a/Documentation/filesystems/exofs.txt +++ b/Documentation/filesystems/exofs.txt | |||
@@ -60,13 +60,13 @@ USAGE | |||
60 | 60 | ||
61 | mkfs.exofs --pid=65536 --format /dev/osd0 | 61 | mkfs.exofs --pid=65536 --format /dev/osd0 |
62 | 62 | ||
63 | The --format is optional if not specified no OSD_FORMAT will be | 63 | The --format is optional. If not specified, no OSD_FORMAT will be |
64 | preformed and a clean file system will be created in the specified pid, | 64 | performed and a clean file system will be created in the specified pid, |
65 | in the available space of the target. (Use --format=size_in_meg to limit | 65 | in the available space of the target. (Use --format=size_in_meg to limit |
66 | the total LUN space available) | 66 | the total LUN space available) |
67 | 67 | ||
68 | If pid already exist it will be deleted and a new one will be created in it's | 68 | If pid already exists, it will be deleted and a new one will be created in |
69 | place. Be careful. | 69 | its place. Be careful. |
70 | 70 | ||
71 | An exofs lives inside a single OSD partition. You can create multiple exofs | 71 | An exofs lives inside a single OSD partition. You can create multiple exofs |
72 | filesystems on the same device using multiple pids. | 72 | filesystems on the same device using multiple pids. |
@@ -81,7 +81,7 @@ USAGE | |||
81 | 81 | ||
82 | 7. For reference (See do-exofs example script): | 82 | 7. For reference (See do-exofs example script): |
83 | do-exofs start - an example of how to perform the above steps. | 83 | do-exofs start - an example of how to perform the above steps. |
84 | do-exofs stop - an example of how to unmount the file system. | 84 | do-exofs stop - an example of how to unmount the file system. |
85 | do-exofs format - an example of how to format and mkfs a new exofs. | 85 | do-exofs format - an example of how to format and mkfs a new exofs. |
86 | 86 | ||
87 | 8. Extra compilation flags (uncomment in fs/exofs/Kbuild): | 87 | 8. Extra compilation flags (uncomment in fs/exofs/Kbuild): |
@@ -104,8 +104,8 @@ Where: | |||
104 | exofs specific options: Options are separated by commas (,) | 104 | exofs specific options: Options are separated by commas (,) |
105 | pid=<integer> - The partition number to mount/create as | 105 | pid=<integer> - The partition number to mount/create as |
106 | container of the filesystem. | 106 | container of the filesystem. |
107 | This option is mandatory | 107 | This option is mandatory. |
108 | to=<integer> - Timeout in ticks for a single command | 108 | to=<integer> - Timeout in ticks for a single command. |
109 | default is (60 * HZ) [for debugging only] | 109 | default is (60 * HZ) [for debugging only] |
110 | 110 | ||
111 | =============================================================================== | 111 | =============================================================================== |
@@ -116,7 +116,7 @@ DESIGN | |||
116 | with a special ID (defined in common.h). | 116 | with a special ID (defined in common.h). |
117 | Information included in the file system control block is used to fill the | 117 | Information included in the file system control block is used to fill the |
118 | in-memory superblock structure at mount time. This object is created before | 118 | in-memory superblock structure at mount time. This object is created before |
119 | the file system is used by mkexofs.c It contains information such as: | 119 | the file system is used by mkexofs.c. It contains information such as: |
120 | - The file system's magic number | 120 | - The file system's magic number |
121 | - The next inode number to be allocated | 121 | - The next inode number to be allocated |
122 | 122 | ||
@@ -134,8 +134,8 @@ DESIGN | |||
134 | attributes. This applies to both regular files and other types (directories, | 134 | attributes. This applies to both regular files and other types (directories, |
135 | device files, symlinks, etc.). | 135 | device files, symlinks, etc.). |
136 | 136 | ||
137 | * Credentials are generated per object (inode and superblock) when they is | 137 | * Credentials are generated per object (inode and superblock) when they are |
138 | created in memory (read off disk or created). The credential works for all | 138 | created in memory (read from disk or created). The credential works for all |
139 | operations and is used as long as the object remains in memory. | 139 | operations and is used as long as the object remains in memory. |
140 | 140 | ||
141 | * Async OSD operations are used whenever possible, but the target may execute | 141 | * Async OSD operations are used whenever possible, but the target may execute |
@@ -145,7 +145,8 @@ DESIGN | |||
145 | from executing in reverse order: | 145 | from executing in reverse order: |
146 | - The following are handled with the OBJ_CREATED and OBJ_2BCREATED | 146 | - The following are handled with the OBJ_CREATED and OBJ_2BCREATED |
147 | flags. OBJ_CREATED is set when we know the object exists on the OSD - | 147 | flags. OBJ_CREATED is set when we know the object exists on the OSD - |
148 | in create's callback function, and when we successfully do a read_inode. | 148 | in create's callback function, and when we successfully do a |
149 | read_inode. | ||
149 | OBJ_2BCREATED is set in the beginning of the create function, so we | 150 | OBJ_2BCREATED is set in the beginning of the create function, so we |
150 | know that we should wait. | 151 | know that we should wait. |
151 | - create/delete: delete should wait until the object is created | 152 | - create/delete: delete should wait until the object is created |
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt index 570f9bd9be2b..867c5b50cb42 100644 --- a/Documentation/filesystems/ext3.txt +++ b/Documentation/filesystems/ext3.txt | |||
@@ -32,8 +32,8 @@ journal_dev=devnum When the external journal device's major/minor numbers | |||
32 | identified through its new major/minor numbers encoded | 32 | identified through its new major/minor numbers encoded |
33 | in devnum. | 33 | in devnum. |
34 | 34 | ||
35 | noload Don't load the journal on mounting. Note that this forces | 35 | norecovery Don't load the journal on mounting. Note that this forces |
36 | mount of inconsistent filesystem, which can lead to | 36 | noload mount of inconsistent filesystem, which can lead to |
37 | various problems. | 37 | various problems. |
38 | 38 | ||
39 | data=journal All data are committed into the journal prior to being | 39 | data=journal All data are committed into the journal prior to being |
@@ -123,10 +123,18 @@ resuid=n The user ID which may use the reserved blocks. | |||
123 | 123 | ||
124 | sb=n Use alternate superblock at this location. | 124 | sb=n Use alternate superblock at this location. |
125 | 125 | ||
126 | quota | 126 | quota These options are ignored by the filesystem. They |
127 | noquota | 127 | noquota are used only by quota tools to recognize volumes |
128 | grpquota | 128 | grpquota where quota should be turned on. See documentation |
129 | usrquota | 129 | usrquota in the quota-tools package for more details |
130 | (http://sourceforge.net/projects/linuxquota). | ||
131 | |||
132 | jqfmt=<quota type> These options tell filesystem details about quota | ||
133 | usrjquota=<file> so that quota information can be properly updated | ||
134 | grpjquota=<file> during journal replay. They replace the above | ||
135 | quota options. See documentation in the quota-tools | ||
136 | package for more details | ||
137 | (http://sourceforge.net/projects/linuxquota). | ||
130 | 138 | ||
131 | bh (*) ext3 associates buffer heads to data pages to | 139 | bh (*) ext3 associates buffer heads to data pages to |
132 | nobh (a) cache disk block mapping information | 140 | nobh (a) cache disk block mapping information |
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 18b5ec8cea45..e1def1786e50 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
@@ -134,9 +134,15 @@ ro Mount filesystem read only. Note that ext4 will | |||
134 | mount options "ro,noload" can be used to prevent | 134 | mount options "ro,noload" can be used to prevent |
135 | writes to the filesystem. | 135 | writes to the filesystem. |
136 | 136 | ||
137 | journal_checksum Enable checksumming of the journal transactions. | ||
138 | This will allow the recovery code in e2fsck and the | ||
139 | kernel to detect corruption in the kernel. It is a | ||
140 | compatible change and will be ignored by older kernels. | ||
141 | |||
137 | journal_async_commit Commit block can be written to disk without waiting | 142 | journal_async_commit Commit block can be written to disk without waiting |
138 | for descriptor blocks. If enabled older kernels cannot | 143 | for descriptor blocks. If enabled older kernels cannot |
139 | mount the device. | 144 | mount the device. This will enable 'journal_checksum' |
145 | internally. | ||
140 | 146 | ||
141 | journal=update Update the ext4 file system's journal to the current | 147 | journal=update Update the ext4 file system's journal to the current |
142 | format. | 148 | format. |
@@ -147,8 +153,8 @@ journal_dev=devnum When the external journal device's major/minor numbers | |||
147 | identified through its new major/minor numbers encoded | 153 | identified through its new major/minor numbers encoded |
148 | in devnum. | 154 | in devnum. |
149 | 155 | ||
150 | noload Don't load the journal on mounting. Note that | 156 | norecovery Don't load the journal on mounting. Note that |
151 | if the filesystem was not unmounted cleanly, | 157 | noload if the filesystem was not unmounted cleanly, |
152 | skipping the journal replay will lead to the | 158 | skipping the journal replay will lead to the |
153 | filesystem containing inconsistencies that can | 159 | filesystem containing inconsistencies that can |
154 | lead to any number of problems. | 160 | lead to any number of problems. |
@@ -190,7 +196,7 @@ nobarrier This also requires an IO stack which can support | |||
190 | also be used to enable or disable barriers, for | 196 | also be used to enable or disable barriers, for |
191 | consistency with other ext4 mount options. | 197 | consistency with other ext4 mount options. |
192 | 198 | ||
193 | inode_readahead=n This tuning parameter controls the maximum | 199 | inode_readahead_blks=n This tuning parameter controls the maximum |
194 | number of inode table blocks that ext4's inode | 200 | number of inode table blocks that ext4's inode |
195 | table readahead algorithm will pre-read into | 201 | table readahead algorithm will pre-read into |
196 | the buffer cache. The default value is 32 blocks. | 202 | the buffer cache. The default value is 32 blocks. |
@@ -282,9 +288,16 @@ stripe=n Number of filesystem blocks that mballoc will try | |||
282 | to use for allocation size and alignment. For RAID5/6 | 288 | to use for allocation size and alignment. For RAID5/6 |
283 | systems this should be the number of data | 289 | systems this should be the number of data |
284 | disks * RAID chunk size in file system blocks. | 290 | disks * RAID chunk size in file system blocks. |
285 | delalloc (*) Deferring block allocation until write-out time. | 291 | |
286 | nodelalloc Disable delayed allocation. Blocks are allocation | 292 | delalloc (*) Defer block allocation until just before ext4 |
287 | when data is copied from user to page cache. | 293 | writes out the block(s) in question. This |
294 | allows ext4 to better allocation decisions | ||
295 | more efficiently. | ||
296 | nodelalloc Disable delayed allocation. Blocks are allocated | ||
297 | when the data is copied from userspace to the | ||
298 | page cache, either via the write(2) system call | ||
299 | or when an mmap'ed page which was previously | ||
300 | unallocated is written for the first time. | ||
288 | 301 | ||
289 | max_batch_time=usec Maximum amount of time ext4 should wait for | 302 | max_batch_time=usec Maximum amount of time ext4 should wait for |
290 | additional filesystem operations to be batch | 303 | additional filesystem operations to be batch |
@@ -340,6 +353,12 @@ noauto_da_alloc replacing existing files via patterns such as | |||
340 | system crashes before the delayed allocation | 353 | system crashes before the delayed allocation |
341 | blocks are forced to disk. | 354 | blocks are forced to disk. |
342 | 355 | ||
356 | discard Controls whether ext4 should issue discard/TRIM | ||
357 | nodiscard(*) commands to the underlying block device when | ||
358 | blocks are freed. This is useful for SSD devices | ||
359 | and sparse/thinly-provisioned LUNs, but it is off | ||
360 | by default until sufficient testing has been done. | ||
361 | |||
343 | Data Mode | 362 | Data Mode |
344 | ========= | 363 | ========= |
345 | There are 3 different data modes: | 364 | There are 3 different data modes: |
diff --git a/Documentation/filesystems/logfs.txt b/Documentation/filesystems/logfs.txt new file mode 100644 index 000000000000..e64c94ba401a --- /dev/null +++ b/Documentation/filesystems/logfs.txt | |||
@@ -0,0 +1,241 @@ | |||
1 | |||
2 | The LogFS Flash Filesystem | ||
3 | ========================== | ||
4 | |||
5 | Specification | ||
6 | ============= | ||
7 | |||
8 | Superblocks | ||
9 | ----------- | ||
10 | |||
11 | Two superblocks exist at the beginning and end of the filesystem. | ||
12 | Each superblock is 256 Bytes large, with another 3840 Bytes reserved | ||
13 | for future purposes, making a total of 4096 Bytes. | ||
14 | |||
15 | Superblock locations may differ for MTD and block devices. On MTD the | ||
16 | first non-bad block contains a superblock in the first 4096 Bytes and | ||
17 | the last non-bad block contains a superblock in the last 4096 Bytes. | ||
18 | On block devices, the first 4096 Bytes of the device contain the first | ||
19 | superblock and the last aligned 4096 Byte-block contains the second | ||
20 | superblock. | ||
21 | |||
22 | For the most part, the superblocks can be considered read-only. They | ||
23 | are written only to correct errors detected within the superblocks, | ||
24 | move the journal and change the filesystem parameters through tunefs. | ||
25 | As a result, the superblock does not contain any fields that require | ||
26 | constant updates, like the amount of free space, etc. | ||
27 | |||
28 | Segments | ||
29 | -------- | ||
30 | |||
31 | The space in the device is split up into equal-sized segments. | ||
32 | Segments are the primary write unit of LogFS. Within each segments, | ||
33 | writes happen from front (low addresses) to back (high addresses. If | ||
34 | only a partial segment has been written, the segment number, the | ||
35 | current position within and optionally a write buffer are stored in | ||
36 | the journal. | ||
37 | |||
38 | Segments are erased as a whole. Therefore Garbage Collection may be | ||
39 | required to completely free a segment before doing so. | ||
40 | |||
41 | Journal | ||
42 | -------- | ||
43 | |||
44 | The journal contains all global information about the filesystem that | ||
45 | is subject to frequent change. At mount time, it has to be scanned | ||
46 | for the most recent commit entry, which contains a list of pointers to | ||
47 | all currently valid entries. | ||
48 | |||
49 | Object Store | ||
50 | ------------ | ||
51 | |||
52 | All space except for the superblocks and journal is part of the object | ||
53 | store. Each segment contains a segment header and a number of | ||
54 | objects, each consisting of the object header and the payload. | ||
55 | Objects are either inodes, directory entries (dentries), file data | ||
56 | blocks or indirect blocks. | ||
57 | |||
58 | Levels | ||
59 | ------ | ||
60 | |||
61 | Garbage collection (GC) may fail if all data is written | ||
62 | indiscriminately. One requirement of GC is that data is seperated | ||
63 | roughly according to the distance between the tree root and the data. | ||
64 | Effectively that means all file data is on level 0, indirect blocks | ||
65 | are on levels 1, 2, 3 4 or 5 for 1x, 2x, 3x, 4x or 5x indirect blocks, | ||
66 | respectively. Inode file data is on level 6 for the inodes and 7-11 | ||
67 | for indirect blocks. | ||
68 | |||
69 | Each segment contains objects of a single level only. As a result, | ||
70 | each level requires its own seperate segment to be open for writing. | ||
71 | |||
72 | Inode File | ||
73 | ---------- | ||
74 | |||
75 | All inodes are stored in a special file, the inode file. Single | ||
76 | exception is the inode file's inode (master inode) which for obvious | ||
77 | reasons is stored in the journal instead. Instead of data blocks, the | ||
78 | leaf nodes of the inode files are inodes. | ||
79 | |||
80 | Aliases | ||
81 | ------- | ||
82 | |||
83 | Writes in LogFS are done by means of a wandering tree. A naïve | ||
84 | implementation would require that for each write or a block, all | ||
85 | parent blocks are written as well, since the block pointers have | ||
86 | changed. Such an implementation would not be very efficient. | ||
87 | |||
88 | In LogFS, the block pointer changes are cached in the journal by means | ||
89 | of alias entries. Each alias consists of its logical address - inode | ||
90 | number, block index, level and child number (index into block) - and | ||
91 | the changed data. Any 8-byte word can be changes in this manner. | ||
92 | |||
93 | Currently aliases are used for block pointers, file size, file used | ||
94 | bytes and the height of an inodes indirect tree. | ||
95 | |||
96 | Segment Aliases | ||
97 | --------------- | ||
98 | |||
99 | Related to regular aliases, these are used to handle bad blocks. | ||
100 | Initially, bad blocks are handled by moving the affected segment | ||
101 | content to a spare segment and noting this move in the journal with a | ||
102 | segment alias, a simple (to, from) tupel. GC will later empty this | ||
103 | segment and the alias can be removed again. This is used on MTD only. | ||
104 | |||
105 | Vim | ||
106 | --- | ||
107 | |||
108 | By cleverly predicting the life time of data, it is possible to | ||
109 | seperate long-living data from short-living data and thereby reduce | ||
110 | the GC overhead later. Each type of distinc life expectency (vim) can | ||
111 | have a seperate segment open for writing. Each (level, vim) tupel can | ||
112 | be open just once. If an open segment with unknown vim is encountered | ||
113 | at mount time, it is closed and ignored henceforth. | ||
114 | |||
115 | Indirect Tree | ||
116 | ------------- | ||
117 | |||
118 | Inodes in LogFS are similar to FFS-style filesystems with direct and | ||
119 | indirect block pointers. One difference is that LogFS uses a single | ||
120 | indirect pointer that can be either a 1x, 2x, etc. indirect pointer. | ||
121 | A height field in the inode defines the height of the indirect tree | ||
122 | and thereby the indirection of the pointer. | ||
123 | |||
124 | Another difference is the addressing of indirect blocks. In LogFS, | ||
125 | the first 16 pointers in the first indirect block are left empty, | ||
126 | corresponding to the 16 direct pointers in the inode. In ext2 (maybe | ||
127 | others as well) the first pointer in the first indirect block | ||
128 | corresponds to logical block 12, skipping the 12 direct pointers. | ||
129 | So where ext2 is using arithmetic to better utilize space, LogFS keeps | ||
130 | arithmetic simple and uses compression to save space. | ||
131 | |||
132 | Compression | ||
133 | ----------- | ||
134 | |||
135 | Both file data and metadata can be compressed. Compression for file | ||
136 | data can be enabled with chattr +c and disabled with chattr -c. Doing | ||
137 | so has no effect on existing data, but new data will be stored | ||
138 | accordingly. New inodes will inherit the compression flag of the | ||
139 | parent directory. | ||
140 | |||
141 | Metadata is always compressed. However, the space accounting ignores | ||
142 | this and charges for the uncompressed size. Failing to do so could | ||
143 | result in GC failures when, after moving some data, indirect blocks | ||
144 | compress worse than previously. Even on a 100% full medium, GC may | ||
145 | not consume any extra space, so the compression gains are lost space | ||
146 | to the user. | ||
147 | |||
148 | However, they are not lost space to the filesystem internals. By | ||
149 | cheating the user for those bytes, the filesystem gained some slack | ||
150 | space and GC will run less often and faster. | ||
151 | |||
152 | Garbage Collection and Wear Leveling | ||
153 | ------------------------------------ | ||
154 | |||
155 | Garbage collection is invoked whenever the number of free segments | ||
156 | falls below a threshold. The best (known) candidate is picked based | ||
157 | on the least amount of valid data contained in the segment. All | ||
158 | remaining valid data is copied elsewhere, thereby invalidating it. | ||
159 | |||
160 | The GC code also checks for aliases and writes then back if their | ||
161 | number gets too large. | ||
162 | |||
163 | Wear leveling is done by occasionally picking a suboptimal segment for | ||
164 | garbage collection. If a stale segments erase count is significantly | ||
165 | lower than the active segments' erase counts, it will be picked. Wear | ||
166 | leveling is rate limited, so it will never monopolize the device for | ||
167 | more than one segment worth at a time. | ||
168 | |||
169 | Values for "occasionally", "significantly lower" are compile time | ||
170 | constants. | ||
171 | |||
172 | Hashed directories | ||
173 | ------------------ | ||
174 | |||
175 | To satisfy efficient lookup(), directory entries are hashed and | ||
176 | located based on the hash. In order to both support large directories | ||
177 | and not be overly inefficient for small directories, several hash | ||
178 | tables of increasing size are used. For each table, the hash value | ||
179 | modulo the table size gives the table index. | ||
180 | |||
181 | Tables sizes are chosen to limit the number of indirect blocks with a | ||
182 | fully populated table to 0, 1, 2 or 3 respectively. So the first | ||
183 | table contains 16 entries, the second 512-16, etc. | ||
184 | |||
185 | The last table is special in several ways. First its size depends on | ||
186 | the effective 32bit limit on telldir/seekdir cookies. Since logfs | ||
187 | uses the upper half of the address space for indirect blocks, the size | ||
188 | is limited to 2^31. Secondly the table contains hash buckets with 16 | ||
189 | entries each. | ||
190 | |||
191 | Using single-entry buckets would result in birthday "attacks". At | ||
192 | just 2^16 used entries, hash collisions would be likely (P >= 0.5). | ||
193 | My math skills are insufficient to do the combinatorics for the 17x | ||
194 | collisions necessary to overflow a bucket, but testing showed that in | ||
195 | 10,000 runs the lowest directory fill before a bucket overflow was | ||
196 | 188,057,130 entries with an average of 315,149,915 entries. So for | ||
197 | directory sizes of up to a million, bucket overflows should be | ||
198 | virtually impossible under normal circumstances. | ||
199 | |||
200 | With carefully chosen filenames, it is obviously possible to cause an | ||
201 | overflow with just 21 entries (4 higher tables + 16 entries + 1). So | ||
202 | there may be a security concern if a malicious user has write access | ||
203 | to a directory. | ||
204 | |||
205 | Open For Discussion | ||
206 | =================== | ||
207 | |||
208 | Device Address Space | ||
209 | -------------------- | ||
210 | |||
211 | A device address space is used for caching. Both block devices and | ||
212 | MTD provide functions to either read a single page or write a segment. | ||
213 | Partial segments may be written for data integrity, but where possible | ||
214 | complete segments are written for performance on simple block device | ||
215 | flash media. | ||
216 | |||
217 | Meta Inodes | ||
218 | ----------- | ||
219 | |||
220 | Inodes are stored in the inode file, which is just a regular file for | ||
221 | most purposes. At umount time, however, the inode file needs to | ||
222 | remain open until all dirty inodes are written. So | ||
223 | generic_shutdown_super() may not close this inode, but shouldn't | ||
224 | complain about remaining inodes due to the inode file either. Same | ||
225 | goes for mapping inode of the device address space. | ||
226 | |||
227 | Currently logfs uses a hack that essentially copies part of fs/inode.c | ||
228 | code over. A general solution would be preferred. | ||
229 | |||
230 | Indirect block mapping | ||
231 | ---------------------- | ||
232 | |||
233 | With compression, the block device (or mapping inode) cannot be used | ||
234 | to cache indirect blocks. Some other place is required. Currently | ||
235 | logfs uses the top half of each inode's address space. The low 8TB | ||
236 | (on 32bit) are filled with file data, the high 8TB are used for | ||
237 | indirect blocks. | ||
238 | |||
239 | One problem is that 16TB files created on 64bit systems actually have | ||
240 | data in the top 8TB. But files >16TB would cause problems anyway, so | ||
241 | only the limit has changed. | ||
diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX new file mode 100644 index 000000000000..2f68cd688769 --- /dev/null +++ b/Documentation/filesystems/nfs/00-INDEX | |||
@@ -0,0 +1,16 @@ | |||
1 | 00-INDEX | ||
2 | - this file (nfs-related documentation). | ||
3 | Exporting | ||
4 | - explanation of how to make filesystems exportable. | ||
5 | knfsd-stats.txt | ||
6 | - statistics which the NFS server makes available to user space. | ||
7 | nfs.txt | ||
8 | - nfs client, and DNS resolution for fs_locations. | ||
9 | nfs41-server.txt | ||
10 | - info on the Linux server implementation of NFSv4 minor version 1. | ||
11 | nfs-rdma.txt | ||
12 | - how to install and setup the Linux NFS/RDMA client and server software | ||
13 | nfsroot.txt | ||
14 | - short guide on setting up a diskless box with NFS root filesystem. | ||
15 | rpc-cache.txt | ||
16 | - introduction to the caching mechanisms in the sunrpc layer. | ||
diff --git a/Documentation/filesystems/Exporting b/Documentation/filesystems/nfs/Exporting index 87019d2b5981..87019d2b5981 100644 --- a/Documentation/filesystems/Exporting +++ b/Documentation/filesystems/nfs/Exporting | |||
diff --git a/Documentation/filesystems/knfsd-stats.txt b/Documentation/filesystems/nfs/knfsd-stats.txt index 64ced5149d37..64ced5149d37 100644 --- a/Documentation/filesystems/knfsd-stats.txt +++ b/Documentation/filesystems/nfs/knfsd-stats.txt | |||
diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs/nfs-rdma.txt index e386f7e4bcee..e386f7e4bcee 100644 --- a/Documentation/filesystems/nfs-rdma.txt +++ b/Documentation/filesystems/nfs/nfs-rdma.txt | |||
diff --git a/Documentation/filesystems/nfs.txt b/Documentation/filesystems/nfs/nfs.txt index f50f26ce6cd0..f50f26ce6cd0 100644 --- a/Documentation/filesystems/nfs.txt +++ b/Documentation/filesystems/nfs/nfs.txt | |||
diff --git a/Documentation/filesystems/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt index 5920fe26e6ff..6a53a84afc72 100644 --- a/Documentation/filesystems/nfs41-server.txt +++ b/Documentation/filesystems/nfs/nfs41-server.txt | |||
@@ -17,8 +17,7 @@ kernels must turn 4.1 on or off *before* turning support for version 4 | |||
17 | on or off; rpc.nfsd does this correctly.) | 17 | on or off; rpc.nfsd does this correctly.) |
18 | 18 | ||
19 | The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based | 19 | The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based |
20 | on the latest NFSv4.1 Internet Draft: | 20 | on RFC 5661. |
21 | http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29 | ||
22 | 21 | ||
23 | From the many new features in NFSv4.1 the current implementation | 22 | From the many new features in NFSv4.1 the current implementation |
24 | focuses on the mandatory-to-implement NFSv4.1 Sessions, providing | 23 | focuses on the mandatory-to-implement NFSv4.1 Sessions, providing |
@@ -41,10 +40,10 @@ interoperability problems with future clients. Known issues: | |||
41 | conformant with the spec (for example, we don't use kerberos | 40 | conformant with the spec (for example, we don't use kerberos |
42 | on the backchannel correctly). | 41 | on the backchannel correctly). |
43 | - no trunking support: no clients currently take advantage of | 42 | - no trunking support: no clients currently take advantage of |
44 | trunking, but this is a mandatory failure, and its use is | 43 | trunking, but this is a mandatory feature, and its use is |
45 | recommended to clients in a number of places. (E.g. to ensure | 44 | recommended to clients in a number of places. (E.g. to ensure |
46 | timely renewal in case an existing connection's retry timeouts | 45 | timely renewal in case an existing connection's retry timeouts |
47 | have gotten too long; see section 8.3 of the draft.) | 46 | have gotten too long; see section 8.3 of the RFC.) |
48 | Therefore, lack of this feature may cause future clients to | 47 | Therefore, lack of this feature may cause future clients to |
49 | fail. | 48 | fail. |
50 | - Incomplete backchannel support: incomplete backchannel gss | 49 | - Incomplete backchannel support: incomplete backchannel gss |
@@ -213,3 +212,10 @@ The following cases aren't supported yet: | |||
213 | DESTROY_CLIENTID, DESTROY_SESSION, EXCHANGE_ID. | 212 | DESTROY_CLIENTID, DESTROY_SESSION, EXCHANGE_ID. |
214 | * DESTROY_SESSION MUST be the final operation in the COMPOUND request. | 213 | * DESTROY_SESSION MUST be the final operation in the COMPOUND request. |
215 | 214 | ||
215 | Nonstandard compound limitations: | ||
216 | * No support for a sessions fore channel RPC compound that requires both a | ||
217 | ca_maxrequestsize request and a ca_maxresponsesize reply, so we may | ||
218 | fail to live up to the promise we made in CREATE_SESSION fore channel | ||
219 | negotiation. | ||
220 | * No more than one IO operation (read, write, readdir) allowed per | ||
221 | compound. | ||
diff --git a/Documentation/filesystems/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt index 3ba0b945aaf8..3ba0b945aaf8 100644 --- a/Documentation/filesystems/nfsroot.txt +++ b/Documentation/filesystems/nfs/nfsroot.txt | |||
diff --git a/Documentation/filesystems/rpc-cache.txt b/Documentation/filesystems/nfs/rpc-cache.txt index 8a382bea6808..8a382bea6808 100644 --- a/Documentation/filesystems/rpc-cache.txt +++ b/Documentation/filesystems/nfs/rpc-cache.txt | |||
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt index 01539f410676..cf6d0d85ca82 100644 --- a/Documentation/filesystems/nilfs2.txt +++ b/Documentation/filesystems/nilfs2.txt | |||
@@ -28,7 +28,7 @@ described in the man pages included in the package. | |||
28 | Project web page: http://www.nilfs.org/en/ | 28 | Project web page: http://www.nilfs.org/en/ |
29 | Download page: http://www.nilfs.org/en/download.html | 29 | Download page: http://www.nilfs.org/en/download.html |
30 | Git tree web page: http://www.nilfs.org/git/ | 30 | Git tree web page: http://www.nilfs.org/git/ |
31 | NILFS mailing lists: http://www.nilfs.org/mailman/listinfo/users | 31 | List info: http://vger.kernel.org/vger-lists.html#linux-nilfs |
32 | 32 | ||
33 | Caveats | 33 | Caveats |
34 | ======= | 34 | ======= |
@@ -49,8 +49,7 @@ Mount options | |||
49 | NILFS2 supports the following mount options: | 49 | NILFS2 supports the following mount options: |
50 | (*) == default | 50 | (*) == default |
51 | 51 | ||
52 | barrier=on(*) This enables/disables barriers. barrier=off disables | 52 | nobarrier Disables barriers. |
53 | it, barrier=on enables it. | ||
54 | errors=continue(*) Keep going on a filesystem error. | 53 | errors=continue(*) Keep going on a filesystem error. |
55 | errors=remount-ro Remount the filesystem read-only on an error. | 54 | errors=remount-ro Remount the filesystem read-only on an error. |
56 | errors=panic Panic and halt the machine if an error occurs. | 55 | errors=panic Panic and halt the machine if an error occurs. |
@@ -71,6 +70,13 @@ order=strict Apply strict in-order semantics that preserves sequence | |||
71 | blocks. That means, it is guaranteed that no | 70 | blocks. That means, it is guaranteed that no |
72 | overtaking of events occurs in the recovered file | 71 | overtaking of events occurs in the recovered file |
73 | system after a crash. | 72 | system after a crash. |
73 | norecovery Disable recovery of the filesystem on mount. | ||
74 | This disables every write access on the device for | ||
75 | read-only mounts or snapshots. This option will fail | ||
76 | for r/w mounts on an unclean volume. | ||
77 | discard Issue discard/TRIM commands to the underlying block | ||
78 | device when blocks are freed. This is useful for SSD | ||
79 | devices and sparse/thinly-provisioned LUNs. | ||
74 | 80 | ||
75 | NILFS2 usage | 81 | NILFS2 usage |
76 | ============ | 82 | ============ |
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index c2a0871280a0..c58b9f5ba002 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt | |||
@@ -20,15 +20,16 @@ Lots of code taken from ext3 and other projects. | |||
20 | Authors in alphabetical order: | 20 | Authors in alphabetical order: |
21 | Joel Becker <joel.becker@oracle.com> | 21 | Joel Becker <joel.becker@oracle.com> |
22 | Zach Brown <zach.brown@oracle.com> | 22 | Zach Brown <zach.brown@oracle.com> |
23 | Mark Fasheh <mark.fasheh@oracle.com> | 23 | Mark Fasheh <mfasheh@suse.com> |
24 | Kurt Hackel <kurt.hackel@oracle.com> | 24 | Kurt Hackel <kurt.hackel@oracle.com> |
25 | Tao Ma <tao.ma@oracle.com> | ||
25 | Sunil Mushran <sunil.mushran@oracle.com> | 26 | Sunil Mushran <sunil.mushran@oracle.com> |
26 | Manish Singh <manish.singh@oracle.com> | 27 | Manish Singh <manish.singh@oracle.com> |
28 | Tiger Yang <tiger.yang@oracle.com> | ||
27 | 29 | ||
28 | Caveats | 30 | Caveats |
29 | ======= | 31 | ======= |
30 | Features which OCFS2 does not support yet: | 32 | Features which OCFS2 does not support yet: |
31 | - quotas | ||
32 | - Directory change notification (F_NOTIFY) | 33 | - Directory change notification (F_NOTIFY) |
33 | - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) | 34 | - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) |
34 | 35 | ||
@@ -70,7 +71,6 @@ commit=nrsec (*) Ocfs2 can be told to sync all its data and metadata | |||
70 | performance. | 71 | performance. |
71 | localalloc=8(*) Allows custom localalloc size in MB. If the value is too | 72 | localalloc=8(*) Allows custom localalloc size in MB. If the value is too |
72 | large, the fs will silently revert it to the default. | 73 | large, the fs will silently revert it to the default. |
73 | Localalloc is not enabled for local mounts. | ||
74 | localflocks This disables cluster aware flock. | 74 | localflocks This disables cluster aware flock. |
75 | inode64 Indicates that Ocfs2 is allowed to create inodes at | 75 | inode64 Indicates that Ocfs2 is allowed to create inodes at |
76 | any location in the filesystem, including those which | 76 | any location in the filesystem, including those which |
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 92b888d540a6..a7e9746ee7ea 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting | |||
@@ -140,7 +140,7 @@ Callers of notify_change() need ->i_mutex now. | |||
140 | New super_block field "struct export_operations *s_export_op" for | 140 | New super_block field "struct export_operations *s_export_op" for |
141 | explicit support for exporting, e.g. via NFS. The structure is fully | 141 | explicit support for exporting, e.g. via NFS. The structure is fully |
142 | documented at its declaration in include/linux/fs.h, and in | 142 | documented at its declaration in include/linux/fs.h, and in |
143 | Documentation/filesystems/Exporting. | 143 | Documentation/filesystems/nfs/Exporting. |
144 | 144 | ||
145 | Briefly it allows for the definition of decode_fh and encode_fh operations | 145 | Briefly it allows for the definition of decode_fh and encode_fh operations |
146 | to encode and decode filehandles, and allows the filesystem to use | 146 | to encode and decode filehandles, and allows the filesystem to use |
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index b5aee7838a00..a4f30faa4f1f 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -38,6 +38,7 @@ Table of Contents | |||
38 | 3.3 /proc/<pid>/io - Display the IO accounting fields | 38 | 3.3 /proc/<pid>/io - Display the IO accounting fields |
39 | 3.4 /proc/<pid>/coredump_filter - Core dump filtering settings | 39 | 3.4 /proc/<pid>/coredump_filter - Core dump filtering settings |
40 | 3.5 /proc/<pid>/mountinfo - Information about mounts | 40 | 3.5 /proc/<pid>/mountinfo - Information about mounts |
41 | 3.6 /proc/<pid>/comm & /proc/<pid>/task/<tid>/comm | ||
41 | 42 | ||
42 | 43 | ||
43 | ------------------------------------------------------------------------------ | 44 | ------------------------------------------------------------------------------ |
@@ -163,6 +164,7 @@ read the file /proc/PID/status: | |||
163 | VmExe: 68 kB | 164 | VmExe: 68 kB |
164 | VmLib: 1412 kB | 165 | VmLib: 1412 kB |
165 | VmPTE: 20 kb | 166 | VmPTE: 20 kb |
167 | VmSwap: 0 kB | ||
166 | Threads: 1 | 168 | Threads: 1 |
167 | SigQ: 0/28578 | 169 | SigQ: 0/28578 |
168 | SigPnd: 0000000000000000 | 170 | SigPnd: 0000000000000000 |
@@ -176,7 +178,6 @@ read the file /proc/PID/status: | |||
176 | CapBnd: ffffffffffffffff | 178 | CapBnd: ffffffffffffffff |
177 | voluntary_ctxt_switches: 0 | 179 | voluntary_ctxt_switches: 0 |
178 | nonvoluntary_ctxt_switches: 1 | 180 | nonvoluntary_ctxt_switches: 1 |
179 | Stack usage: 12 kB | ||
180 | 181 | ||
181 | This shows you nearly the same information you would get if you viewed it with | 182 | This shows you nearly the same information you would get if you viewed it with |
182 | the ps command. In fact, ps uses the proc file system to obtain its | 183 | the ps command. In fact, ps uses the proc file system to obtain its |
@@ -188,7 +189,13 @@ memory usage. Its seven fields are explained in Table 1-3. The stat file | |||
188 | contains details information about the process itself. Its fields are | 189 | contains details information about the process itself. Its fields are |
189 | explained in Table 1-4. | 190 | explained in Table 1-4. |
190 | 191 | ||
191 | Table 1-2: Contents of the statm files (as of 2.6.30-rc7) | 192 | (for SMP CONFIG users) |
193 | For making accounting scalable, RSS related information are handled in | ||
194 | asynchronous manner and the vaule may not be very precise. To see a precise | ||
195 | snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table. | ||
196 | It's slow but very precise. | ||
197 | |||
198 | Table 1-2: Contents of the status files (as of 2.6.30-rc7) | ||
192 | .............................................................................. | 199 | .............................................................................. |
193 | Field Content | 200 | Field Content |
194 | Name filename of the executable | 201 | Name filename of the executable |
@@ -213,6 +220,7 @@ Table 1-2: Contents of the statm files (as of 2.6.30-rc7) | |||
213 | VmExe size of text segment | 220 | VmExe size of text segment |
214 | VmLib size of shared library code | 221 | VmLib size of shared library code |
215 | VmPTE size of page table entries | 222 | VmPTE size of page table entries |
223 | VmSwap size of swap usage (the number of referred swapents) | ||
216 | Threads number of threads | 224 | Threads number of threads |
217 | SigQ number of signals queued/max. number for queue | 225 | SigQ number of signals queued/max. number for queue |
218 | SigPnd bitmap of pending signals for the thread | 226 | SigPnd bitmap of pending signals for the thread |
@@ -230,7 +238,6 @@ Table 1-2: Contents of the statm files (as of 2.6.30-rc7) | |||
230 | Mems_allowed_list Same as previous, but in "list format" | 238 | Mems_allowed_list Same as previous, but in "list format" |
231 | voluntary_ctxt_switches number of voluntary context switches | 239 | voluntary_ctxt_switches number of voluntary context switches |
232 | nonvoluntary_ctxt_switches number of non voluntary context switches | 240 | nonvoluntary_ctxt_switches number of non voluntary context switches |
233 | Stack usage: stack usage high water mark (round up to page size) | ||
234 | .............................................................................. | 241 | .............................................................................. |
235 | 242 | ||
236 | Table 1-3: Contents of the statm files (as of 2.6.8-rc3) | 243 | Table 1-3: Contents of the statm files (as of 2.6.8-rc3) |
@@ -431,6 +438,7 @@ Table 1-5: Kernel info in /proc | |||
431 | modules List of loaded modules | 438 | modules List of loaded modules |
432 | mounts Mounted filesystems | 439 | mounts Mounted filesystems |
433 | net Networking info (see text) | 440 | net Networking info (see text) |
441 | pagetypeinfo Additional page allocator information (see text) (2.5) | ||
434 | partitions Table of partitions known to the system | 442 | partitions Table of partitions known to the system |
435 | pci Deprecated info of PCI bus (new way -> /proc/bus/pci/, | 443 | pci Deprecated info of PCI bus (new way -> /proc/bus/pci/, |
436 | decoupled by lspci (2.4) | 444 | decoupled by lspci (2.4) |
@@ -585,7 +593,7 @@ Node 0, zone DMA 0 4 5 4 4 3 ... | |||
585 | Node 0, zone Normal 1 0 0 1 101 8 ... | 593 | Node 0, zone Normal 1 0 0 1 101 8 ... |
586 | Node 0, zone HighMem 2 0 0 1 1 0 ... | 594 | Node 0, zone HighMem 2 0 0 1 1 0 ... |
587 | 595 | ||
588 | Memory fragmentation is a problem under some workloads, and buddyinfo is a | 596 | External fragmentation is a problem under some workloads, and buddyinfo is a |
589 | useful tool for helping diagnose these problems. Buddyinfo will give you a | 597 | useful tool for helping diagnose these problems. Buddyinfo will give you a |
590 | clue as to how big an area you can safely allocate, or why a previous | 598 | clue as to how big an area you can safely allocate, or why a previous |
591 | allocation failed. | 599 | allocation failed. |
@@ -595,6 +603,48 @@ available. In this case, there are 0 chunks of 2^0*PAGE_SIZE available in | |||
595 | ZONE_DMA, 4 chunks of 2^1*PAGE_SIZE in ZONE_DMA, 101 chunks of 2^4*PAGE_SIZE | 603 | ZONE_DMA, 4 chunks of 2^1*PAGE_SIZE in ZONE_DMA, 101 chunks of 2^4*PAGE_SIZE |
596 | available in ZONE_NORMAL, etc... | 604 | available in ZONE_NORMAL, etc... |
597 | 605 | ||
606 | More information relevant to external fragmentation can be found in | ||
607 | pagetypeinfo. | ||
608 | |||
609 | > cat /proc/pagetypeinfo | ||
610 | Page block order: 9 | ||
611 | Pages per block: 512 | ||
612 | |||
613 | Free pages count per migrate type at order 0 1 2 3 4 5 6 7 8 9 10 | ||
614 | Node 0, zone DMA, type Unmovable 0 0 0 1 1 1 1 1 1 1 0 | ||
615 | Node 0, zone DMA, type Reclaimable 0 0 0 0 0 0 0 0 0 0 0 | ||
616 | Node 0, zone DMA, type Movable 1 1 2 1 2 1 1 0 1 0 2 | ||
617 | Node 0, zone DMA, type Reserve 0 0 0 0 0 0 0 0 0 1 0 | ||
618 | Node 0, zone DMA, type Isolate 0 0 0 0 0 0 0 0 0 0 0 | ||
619 | Node 0, zone DMA32, type Unmovable 103 54 77 1 1 1 11 8 7 1 9 | ||
620 | Node 0, zone DMA32, type Reclaimable 0 0 2 1 0 0 0 0 1 0 0 | ||
621 | Node 0, zone DMA32, type Movable 169 152 113 91 77 54 39 13 6 1 452 | ||
622 | Node 0, zone DMA32, type Reserve 1 2 2 2 2 0 1 1 1 1 0 | ||
623 | Node 0, zone DMA32, type Isolate 0 0 0 0 0 0 0 0 0 0 0 | ||
624 | |||
625 | Number of blocks type Unmovable Reclaimable Movable Reserve Isolate | ||
626 | Node 0, zone DMA 2 0 5 1 0 | ||
627 | Node 0, zone DMA32 41 6 967 2 0 | ||
628 | |||
629 | Fragmentation avoidance in the kernel works by grouping pages of different | ||
630 | migrate types into the same contiguous regions of memory called page blocks. | ||
631 | A page block is typically the size of the default hugepage size e.g. 2MB on | ||
632 | X86-64. By keeping pages grouped based on their ability to move, the kernel | ||
633 | can reclaim pages within a page block to satisfy a high-order allocation. | ||
634 | |||
635 | The pagetypinfo begins with information on the size of a page block. It | ||
636 | then gives the same type of information as buddyinfo except broken down | ||
637 | by migrate-type and finishes with details on how many page blocks of each | ||
638 | type exist. | ||
639 | |||
640 | If min_free_kbytes has been tuned correctly (recommendations made by hugeadm | ||
641 | from libhugetlbfs http://sourceforge.net/projects/libhugetlbfs/), one can | ||
642 | make an estimate of the likely number of huge pages that can be allocated | ||
643 | at a given point in time. All the "Movable" blocks should be allocatable | ||
644 | unless memory has been mlock()'d. Some of the Reclaimable blocks should | ||
645 | also be allocatable although a lot of filesystem metadata may have to be | ||
646 | reclaimed to achieve this. | ||
647 | |||
598 | .............................................................................. | 648 | .............................................................................. |
599 | 649 | ||
600 | meminfo: | 650 | meminfo: |
@@ -1072,7 +1122,8 @@ second). The meanings of the columns are as follows, from left to right: | |||
1072 | - irq: servicing interrupts | 1122 | - irq: servicing interrupts |
1073 | - softirq: servicing softirqs | 1123 | - softirq: servicing softirqs |
1074 | - steal: involuntary wait | 1124 | - steal: involuntary wait |
1075 | - guest: running a guest | 1125 | - guest: running a normal guest |
1126 | - guest_nice: running a niced guest | ||
1076 | 1127 | ||
1077 | The "intr" line gives counts of interrupts serviced since boot time, for each | 1128 | The "intr" line gives counts of interrupts serviced since boot time, for each |
1078 | of the possible system interrupts. The first column is the total of all | 1129 | of the possible system interrupts. The first column is the total of all |
@@ -1088,8 +1139,8 @@ The "processes" line gives the number of processes and threads created, which | |||
1088 | includes (but is not limited to) those created by calls to the fork() and | 1139 | includes (but is not limited to) those created by calls to the fork() and |
1089 | clone() system calls. | 1140 | clone() system calls. |
1090 | 1141 | ||
1091 | The "procs_running" line gives the number of processes currently running on | 1142 | The "procs_running" line gives the total number of threads that are |
1092 | CPUs. | 1143 | running or ready to run (i.e., the total number of runnable threads). |
1093 | 1144 | ||
1094 | The "procs_blocked" line gives the number of processes currently blocked, | 1145 | The "procs_blocked" line gives the number of processes currently blocked, |
1095 | waiting for I/O to complete. | 1146 | waiting for I/O to complete. |
@@ -1113,7 +1164,6 @@ Table 1-12: Files in /proc/fs/ext4/<devname> | |||
1113 | .............................................................................. | 1164 | .............................................................................. |
1114 | File Content | 1165 | File Content |
1115 | mb_groups details of multiblock allocator buddy cache of free blocks | 1166 | mb_groups details of multiblock allocator buddy cache of free blocks |
1116 | mb_history multiblock allocation history | ||
1117 | .............................................................................. | 1167 | .............................................................................. |
1118 | 1168 | ||
1119 | 1169 | ||
@@ -1409,3 +1459,11 @@ For more information on mount propagation see: | |||
1409 | 1459 | ||
1410 | Documentation/filesystems/sharedsubtree.txt | 1460 | Documentation/filesystems/sharedsubtree.txt |
1411 | 1461 | ||
1462 | |||
1463 | 3.6 /proc/<pid>/comm & /proc/<pid>/task/<tid>/comm | ||
1464 | -------------------------------------------------------- | ||
1465 | These files provide a method to access a tasks comm value. It also allows for | ||
1466 | a task to set its own or one of its thread siblings comm value. The comm value | ||
1467 | is limited in size compared to the cmdline value, so writing anything longer | ||
1468 | then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated | ||
1469 | comm value. | ||
diff --git a/Documentation/filesystems/seq_file.txt b/Documentation/filesystems/seq_file.txt index 0d15ebccf5b0..a1e2e0dda907 100644 --- a/Documentation/filesystems/seq_file.txt +++ b/Documentation/filesystems/seq_file.txt | |||
@@ -248,9 +248,7 @@ code, that is done in the initialization code in the usual way: | |||
248 | { | 248 | { |
249 | struct proc_dir_entry *entry; | 249 | struct proc_dir_entry *entry; |
250 | 250 | ||
251 | entry = create_proc_entry("sequence", 0, NULL); | 251 | proc_create("sequence", 0, NULL, &ct_file_ops); |
252 | if (entry) | ||
253 | entry->proc_fops = &ct_file_ops; | ||
254 | return 0; | 252 | return 0; |
255 | } | 253 | } |
256 | 254 | ||
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt index 23a181074f94..fc0e39af43c3 100644 --- a/Documentation/filesystems/sharedsubtree.txt +++ b/Documentation/filesystems/sharedsubtree.txt | |||
@@ -837,6 +837,9 @@ replicas continue to be exactly same. | |||
837 | individual lists does not affect propagation or the way propagation | 837 | individual lists does not affect propagation or the way propagation |
838 | tree is modified by operations. | 838 | tree is modified by operations. |
839 | 839 | ||
840 | All vfsmounts in a peer group have the same ->mnt_master. If it is | ||
841 | non-NULL, they form a contiguous (ordered) segment of slave list. | ||
842 | |||
840 | A example propagation tree looks as shown in the figure below. | 843 | A example propagation tree looks as shown in the figure below. |
841 | [ NOTE: Though it looks like a forest, if we consider all the shared | 844 | [ NOTE: Though it looks like a forest, if we consider all the shared |
842 | mounts as a conceptual entity called 'pnode', it becomes a tree] | 845 | mounts as a conceptual entity called 'pnode', it becomes a tree] |
@@ -874,8 +877,19 @@ replicas continue to be exactly same. | |||
874 | 877 | ||
875 | NOTE: The propagation tree is orthogonal to the mount tree. | 878 | NOTE: The propagation tree is orthogonal to the mount tree. |
876 | 879 | ||
880 | 8B Locking: | ||
881 | |||
882 | ->mnt_share, ->mnt_slave, ->mnt_slave_list, ->mnt_master are protected | ||
883 | by namespace_sem (exclusive for modifications, shared for reading). | ||
884 | |||
885 | Normally we have ->mnt_flags modifications serialized by vfsmount_lock. | ||
886 | There are two exceptions: do_add_mount() and clone_mnt(). | ||
887 | The former modifies a vfsmount that has not been visible in any shared | ||
888 | data structures yet. | ||
889 | The latter holds namespace_sem and the only references to vfsmount | ||
890 | are in lists that can't be traversed without namespace_sem. | ||
877 | 891 | ||
878 | 8B Algorithm: | 892 | 8C Algorithm: |
879 | 893 | ||
880 | The crux of the implementation resides in rbind/move operation. | 894 | The crux of the implementation resides in rbind/move operation. |
881 | 895 | ||
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt index b245d524d568..931c806642c5 100644 --- a/Documentation/filesystems/sysfs.txt +++ b/Documentation/filesystems/sysfs.txt | |||
@@ -91,8 +91,8 @@ struct device_attribute { | |||
91 | const char *buf, size_t count); | 91 | const char *buf, size_t count); |
92 | }; | 92 | }; |
93 | 93 | ||
94 | int device_create_file(struct device *, struct device_attribute *); | 94 | int device_create_file(struct device *, const struct device_attribute *); |
95 | void device_remove_file(struct device *, struct device_attribute *); | 95 | void device_remove_file(struct device *, const struct device_attribute *); |
96 | 96 | ||
97 | It also defines this helper for defining device attributes: | 97 | It also defines this helper for defining device attributes: |
98 | 98 | ||
@@ -316,8 +316,8 @@ DEVICE_ATTR(_name, _mode, _show, _store); | |||
316 | 316 | ||
317 | Creation/Removal: | 317 | Creation/Removal: |
318 | 318 | ||
319 | int device_create_file(struct device *device, struct device_attribute * attr); | 319 | int device_create_file(struct device *dev, const struct device_attribute * attr); |
320 | void device_remove_file(struct device * dev, struct device_attribute * attr); | 320 | void device_remove_file(struct device *dev, const struct device_attribute * attr); |
321 | 321 | ||
322 | 322 | ||
323 | - bus drivers (include/linux/device.h) | 323 | - bus drivers (include/linux/device.h) |
@@ -358,7 +358,7 @@ DRIVER_ATTR(_name, _mode, _show, _store) | |||
358 | 358 | ||
359 | Creation/Removal: | 359 | Creation/Removal: |
360 | 360 | ||
361 | int driver_create_file(struct device_driver *, struct driver_attribute *); | 361 | int driver_create_file(struct device_driver *, const struct driver_attribute *); |
362 | void driver_remove_file(struct device_driver *, struct driver_attribute *); | 362 | void driver_remove_file(struct device_driver *, const struct driver_attribute *); |
363 | 363 | ||
364 | 364 | ||
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index b58b84b50fa2..eed520fd0c8e 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt | |||
@@ -102,7 +102,7 @@ shortname=lower|win95|winnt|mixed | |||
102 | winnt: emulate the Windows NT rule for display/create. | 102 | winnt: emulate the Windows NT rule for display/create. |
103 | mixed: emulate the Windows NT rule for display, | 103 | mixed: emulate the Windows NT rule for display, |
104 | emulate the Windows 95 rule for create. | 104 | emulate the Windows 95 rule for create. |
105 | Default setting is `lower'. | 105 | Default setting is `mixed'. |
106 | 106 | ||
107 | tz=UTC -- Interpret timestamps as UTC rather than local time. | 107 | tz=UTC -- Interpret timestamps as UTC rather than local time. |
108 | This option disables the conversion of timestamps | 108 | This option disables the conversion of timestamps |
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 623f094c9d8d..3de2f32edd90 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
@@ -472,7 +472,7 @@ __sync_single_inode) to check if ->writepages has been successful in | |||
472 | writing out the whole address_space. | 472 | writing out the whole address_space. |
473 | 473 | ||
474 | The Writeback tag is used by filemap*wait* and sync_page* functions, | 474 | The Writeback tag is used by filemap*wait* and sync_page* functions, |
475 | via wait_on_page_writeback_range, to wait for all writeback to | 475 | via filemap_fdatawait_range, to wait for all writeback to |
476 | complete. While waiting ->sync_page (if defined) will be called on | 476 | complete. While waiting ->sync_page (if defined) will be called on |
477 | each page that is found to require writeback. | 477 | each page that is found to require writeback. |
478 | 478 | ||