diff options
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r-- | Documentation/filesystems/Locking | 31 | ||||
-rw-r--r-- | Documentation/filesystems/ext4.txt | 14 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/00-INDEX | 4 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/idmapper.txt | 67 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/nfsroot.txt | 22 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/pnfs.txt | 48 | ||||
-rw-r--r-- | Documentation/filesystems/ocfs2.txt | 7 | ||||
-rw-r--r-- | Documentation/filesystems/proc.txt | 25 | ||||
-rw-r--r-- | Documentation/filesystems/sharedsubtree.txt | 4 |
9 files changed, 205 insertions, 17 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 2db4283efa8d..8a817f656f0a 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -349,21 +349,36 @@ call this method upon the IO completion. | |||
349 | 349 | ||
350 | --------------------------- block_device_operations ----------------------- | 350 | --------------------------- block_device_operations ----------------------- |
351 | prototypes: | 351 | prototypes: |
352 | int (*open) (struct inode *, struct file *); | 352 | int (*open) (struct block_device *, fmode_t); |
353 | int (*release) (struct inode *, struct file *); | 353 | int (*release) (struct gendisk *, fmode_t); |
354 | int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); | 354 | int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); |
355 | int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); | ||
356 | int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *); | ||
355 | int (*media_changed) (struct gendisk *); | 357 | int (*media_changed) (struct gendisk *); |
358 | void (*unlock_native_capacity) (struct gendisk *); | ||
356 | int (*revalidate_disk) (struct gendisk *); | 359 | int (*revalidate_disk) (struct gendisk *); |
360 | int (*getgeo)(struct block_device *, struct hd_geometry *); | ||
361 | void (*swap_slot_free_notify) (struct block_device *, unsigned long); | ||
357 | 362 | ||
358 | locking rules: | 363 | locking rules: |
359 | BKL bd_sem | 364 | BKL bd_mutex |
360 | open: yes yes | 365 | open: no yes |
361 | release: yes yes | 366 | release: no yes |
362 | ioctl: yes no | 367 | ioctl: no no |
368 | compat_ioctl: no no | ||
369 | direct_access: no no | ||
363 | media_changed: no no | 370 | media_changed: no no |
371 | unlock_native_capacity: no no | ||
364 | revalidate_disk: no no | 372 | revalidate_disk: no no |
373 | getgeo: no no | ||
374 | swap_slot_free_notify: no no (see below) | ||
375 | |||
376 | media_changed, unlock_native_capacity and revalidate_disk are called only from | ||
377 | check_disk_change(). | ||
378 | |||
379 | swap_slot_free_notify is called with swap_lock and sometimes the page lock | ||
380 | held. | ||
365 | 381 | ||
366 | The last two are called only from check_disk_change(). | ||
367 | 382 | ||
368 | --------------------------- file_operations ------------------------------- | 383 | --------------------------- file_operations ------------------------------- |
369 | prototypes: | 384 | prototypes: |
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index e1def1786e50..6ab9442d7eeb 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
@@ -353,6 +353,20 @@ noauto_da_alloc replacing existing files via patterns such as | |||
353 | system crashes before the delayed allocation | 353 | system crashes before the delayed allocation |
354 | blocks are forced to disk. | 354 | blocks are forced to disk. |
355 | 355 | ||
356 | noinit_itable Do not initialize any uninitialized inode table | ||
357 | blocks in the background. This feature may be | ||
358 | used by installation CD's so that the install | ||
359 | process can complete as quickly as possible; the | ||
360 | inode table initialization process would then be | ||
361 | deferred until the next time the file system | ||
362 | is unmounted. | ||
363 | |||
364 | init_itable=n The lazy itable init code will wait n times the | ||
365 | number of milliseconds it took to zero out the | ||
366 | previous block group's inode table. This | ||
367 | minimizes the impact on the systme performance | ||
368 | while file system's inode table is being initialized. | ||
369 | |||
356 | discard Controls whether ext4 should issue discard/TRIM | 370 | discard Controls whether ext4 should issue discard/TRIM |
357 | nodiscard(*) commands to the underlying block device when | 371 | nodiscard(*) commands to the underlying block device when |
358 | blocks are freed. This is useful for SSD devices | 372 | blocks are freed. This is useful for SSD devices |
diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX index 2f68cd688769..a57e12411d2a 100644 --- a/Documentation/filesystems/nfs/00-INDEX +++ b/Documentation/filesystems/nfs/00-INDEX | |||
@@ -12,5 +12,9 @@ nfs-rdma.txt | |||
12 | - how to install and setup the Linux NFS/RDMA client and server software | 12 | - how to install and setup the Linux NFS/RDMA client and server software |
13 | nfsroot.txt | 13 | nfsroot.txt |
14 | - short guide on setting up a diskless box with NFS root filesystem. | 14 | - short guide on setting up a diskless box with NFS root filesystem. |
15 | pnfs.txt | ||
16 | - short explanation of some of the internals of the pnfs client code | ||
15 | rpc-cache.txt | 17 | rpc-cache.txt |
16 | - introduction to the caching mechanisms in the sunrpc layer. | 18 | - introduction to the caching mechanisms in the sunrpc layer. |
19 | idmapper.txt | ||
20 | - information for configuring request-keys to be used by idmapper | ||
diff --git a/Documentation/filesystems/nfs/idmapper.txt b/Documentation/filesystems/nfs/idmapper.txt new file mode 100644 index 000000000000..b9b4192ea8b5 --- /dev/null +++ b/Documentation/filesystems/nfs/idmapper.txt | |||
@@ -0,0 +1,67 @@ | |||
1 | |||
2 | ========= | ||
3 | ID Mapper | ||
4 | ========= | ||
5 | Id mapper is used by NFS to translate user and group ids into names, and to | ||
6 | translate user and group names into ids. Part of this translation involves | ||
7 | performing an upcall to userspace to request the information. Id mapper will | ||
8 | user request-key to perform this upcall and cache the result. The program | ||
9 | /usr/sbin/nfs.idmap should be called by request-key, and will perform the | ||
10 | translation and initialize a key with the resulting information. | ||
11 | |||
12 | NFS_USE_NEW_IDMAPPER must be selected when configuring the kernel to use this | ||
13 | feature. | ||
14 | |||
15 | =========== | ||
16 | Configuring | ||
17 | =========== | ||
18 | The file /etc/request-key.conf will need to be modified so /sbin/request-key can | ||
19 | direct the upcall. The following line should be added: | ||
20 | |||
21 | #OP TYPE DESCRIPTION CALLOUT INFO PROGRAM ARG1 ARG2 ARG3 ... | ||
22 | #====== ======= =============== =============== =============================== | ||
23 | create id_resolver * * /usr/sbin/nfs.idmap %k %d 600 | ||
24 | |||
25 | This will direct all id_resolver requests to the program /usr/sbin/nfs.idmap. | ||
26 | The last parameter, 600, defines how many seconds into the future the key will | ||
27 | expire. This parameter is optional for /usr/sbin/nfs.idmap. When the timeout | ||
28 | is not specified, nfs.idmap will default to 600 seconds. | ||
29 | |||
30 | id mapper uses for key descriptions: | ||
31 | uid: Find the UID for the given user | ||
32 | gid: Find the GID for the given group | ||
33 | user: Find the user name for the given UID | ||
34 | group: Find the group name for the given GID | ||
35 | |||
36 | You can handle any of these individually, rather than using the generic upcall | ||
37 | program. If you would like to use your own program for a uid lookup then you | ||
38 | would edit your request-key.conf so it look similar to this: | ||
39 | |||
40 | #OP TYPE DESCRIPTION CALLOUT INFO PROGRAM ARG1 ARG2 ARG3 ... | ||
41 | #====== ======= =============== =============== =============================== | ||
42 | create id_resolver uid:* * /some/other/program %k %d 600 | ||
43 | create id_resolver * * /usr/sbin/nfs.idmap %k %d 600 | ||
44 | |||
45 | Notice that the new line was added above the line for the generic program. | ||
46 | request-key will find the first matching line and corresponding program. In | ||
47 | this case, /some/other/program will handle all uid lookups and | ||
48 | /usr/sbin/nfs.idmap will handle gid, user, and group lookups. | ||
49 | |||
50 | See <file:Documentation/keys-request-keys.txt> for more information about the | ||
51 | request-key function. | ||
52 | |||
53 | |||
54 | ========= | ||
55 | nfs.idmap | ||
56 | ========= | ||
57 | nfs.idmap is designed to be called by request-key, and should not be run "by | ||
58 | hand". This program takes two arguments, a serialized key and a key | ||
59 | description. The serialized key is first converted into a key_serial_t, and | ||
60 | then passed as an argument to keyctl_instantiate (both are part of keyutils.h). | ||
61 | |||
62 | The actual lookups are performed by functions found in nfsidmap.h. nfs.idmap | ||
63 | determines the correct function to call by looking at the first part of the | ||
64 | description string. For example, a uid lookup description will appear as | ||
65 | "uid:user@domain". | ||
66 | |||
67 | nfs.idmap will return 0 if the key was instantiated, and non-zero otherwise. | ||
diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt index f2430a7974e1..90c71c6f0d00 100644 --- a/Documentation/filesystems/nfs/nfsroot.txt +++ b/Documentation/filesystems/nfs/nfsroot.txt | |||
@@ -159,6 +159,28 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf> | |||
159 | Default: any | 159 | Default: any |
160 | 160 | ||
161 | 161 | ||
162 | nfsrootdebug | ||
163 | |||
164 | This parameter enables debugging messages to appear in the kernel | ||
165 | log at boot time so that administrators can verify that the correct | ||
166 | NFS mount options, server address, and root path are passed to the | ||
167 | NFS client. | ||
168 | |||
169 | |||
170 | rdinit=<executable file> | ||
171 | |||
172 | To specify which file contains the program that starts system | ||
173 | initialization, administrators can use this command line parameter. | ||
174 | The default value of this parameter is "/init". If the specified | ||
175 | file exists and the kernel can execute it, root filesystem related | ||
176 | kernel command line parameters, including `nfsroot=', are ignored. | ||
177 | |||
178 | A description of the process of mounting the root file system can be | ||
179 | found in: | ||
180 | |||
181 | Documentation/early-userspace/README | ||
182 | |||
183 | |||
162 | 184 | ||
163 | 185 | ||
164 | 3.) Boot Loader | 186 | 3.) Boot Loader |
diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt new file mode 100644 index 000000000000..bc0b9cfe095b --- /dev/null +++ b/Documentation/filesystems/nfs/pnfs.txt | |||
@@ -0,0 +1,48 @@ | |||
1 | Reference counting in pnfs: | ||
2 | ========================== | ||
3 | |||
4 | The are several inter-related caches. We have layouts which can | ||
5 | reference multiple devices, each of which can reference multiple data servers. | ||
6 | Each data server can be referenced by multiple devices. Each device | ||
7 | can be referenced by multiple layouts. To keep all of this straight, | ||
8 | we need to reference count. | ||
9 | |||
10 | |||
11 | struct pnfs_layout_hdr | ||
12 | ---------------------- | ||
13 | The on-the-wire command LAYOUTGET corresponds to struct | ||
14 | pnfs_layout_segment, usually referred to by the variable name lseg. | ||
15 | Each nfs_inode may hold a pointer to a cache of of these layout | ||
16 | segments in nfsi->layout, of type struct pnfs_layout_hdr. | ||
17 | |||
18 | We reference the header for the inode pointing to it, across each | ||
19 | outstanding RPC call that references it (LAYOUTGET, LAYOUTRETURN, | ||
20 | LAYOUTCOMMIT), and for each lseg held within. | ||
21 | |||
22 | Each header is also (when non-empty) put on a list associated with | ||
23 | struct nfs_client (cl_layouts). Being put on this list does not bump | ||
24 | the reference count, as the layout is kept around by the lseg that | ||
25 | keeps it in the list. | ||
26 | |||
27 | deviceid_cache | ||
28 | -------------- | ||
29 | lsegs reference device ids, which are resolved per nfs_client and | ||
30 | layout driver type. The device ids are held in a RCU cache (struct | ||
31 | nfs4_deviceid_cache). The cache itself is referenced across each | ||
32 | mount. The entries (struct nfs4_deviceid) themselves are held across | ||
33 | the lifetime of each lseg referencing them. | ||
34 | |||
35 | RCU is used because the deviceid is basically a write once, read many | ||
36 | data structure. The hlist size of 32 buckets needs better | ||
37 | justification, but seems reasonable given that we can have multiple | ||
38 | deviceid's per filesystem, and multiple filesystems per nfs_client. | ||
39 | |||
40 | The hash code is copied from the nfsd code base. A discussion of | ||
41 | hashing and variations of this algorithm can be found at: | ||
42 | http://groups.google.com/group/comp.lang.c/browse_thread/thread/9522965e2b8d3809 | ||
43 | |||
44 | data server cache | ||
45 | ----------------- | ||
46 | file driver devices refer to data servers, which are kept in a module | ||
47 | level cache. Its reference is held over the lifetime of the deviceid | ||
48 | pointing to it. | ||
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index 1f7ae144f6d8..5393e6611691 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt | |||
@@ -87,3 +87,10 @@ dir_resv_level= (*) By default, directory reservations will scale with file | |||
87 | reservations - users should rarely need to change this | 87 | reservations - users should rarely need to change this |
88 | value. If allocation reservations are turned off, this | 88 | value. If allocation reservations are turned off, this |
89 | option will have no effect. | 89 | option will have no effect. |
90 | coherency=full (*) Disallow concurrent O_DIRECT writes, cluster inode | ||
91 | lock will be taken to force other nodes drop cache, | ||
92 | therefore full cluster coherency is guaranteed even | ||
93 | for O_DIRECT writes. | ||
94 | coherency=buffered Allow concurrent O_DIRECT writes without EX lock among | ||
95 | nodes, which gains high performance at risk of getting | ||
96 | stale data on other nodes. | ||
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index a6aca8740883..e73df2722ff3 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -136,6 +136,7 @@ Table 1-1: Process specific entries in /proc | |||
136 | statm Process memory status information | 136 | statm Process memory status information |
137 | status Process status in human readable form | 137 | status Process status in human readable form |
138 | wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan | 138 | wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan |
139 | pagemap Page table | ||
139 | stack Report full stack trace, enable via CONFIG_STACKTRACE | 140 | stack Report full stack trace, enable via CONFIG_STACKTRACE |
140 | smaps a extension based on maps, showing the memory consumption of | 141 | smaps a extension based on maps, showing the memory consumption of |
141 | each mapping | 142 | each mapping |
@@ -370,17 +371,24 @@ Shared_Dirty: 0 kB | |||
370 | Private_Clean: 0 kB | 371 | Private_Clean: 0 kB |
371 | Private_Dirty: 0 kB | 372 | Private_Dirty: 0 kB |
372 | Referenced: 892 kB | 373 | Referenced: 892 kB |
374 | Anonymous: 0 kB | ||
373 | Swap: 0 kB | 375 | Swap: 0 kB |
374 | KernelPageSize: 4 kB | 376 | KernelPageSize: 4 kB |
375 | MMUPageSize: 4 kB | 377 | MMUPageSize: 4 kB |
376 | 378 | ||
377 | The first of these lines shows the same information as is displayed for the | 379 | The first of these lines shows the same information as is displayed for the |
378 | mapping in /proc/PID/maps. The remaining lines show the size of the mapping, | 380 | mapping in /proc/PID/maps. The remaining lines show the size of the mapping |
379 | the amount of the mapping that is currently resident in RAM, the "proportional | 381 | (size), the amount of the mapping that is currently resident in RAM (RSS), the |
380 | set size” (divide each shared page by the number of processes sharing it), the | 382 | process' proportional share of this mapping (PSS), the number of clean and |
381 | number of clean and dirty shared pages in the mapping, and the number of clean | 383 | dirty private pages in the mapping. Note that even a page which is part of a |
382 | and dirty private pages in the mapping. The "Referenced" indicates the amount | 384 | MAP_SHARED mapping, but has only a single pte mapped, i.e. is currently used |
383 | of memory currently marked as referenced or accessed. | 385 | by only one process, is accounted as private and not as shared. "Referenced" |
386 | indicates the amount of memory currently marked as referenced or accessed. | ||
387 | "Anonymous" shows the amount of memory that does not belong to any file. Even | ||
388 | a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE | ||
389 | and a page is modified, the file page is replaced by a private anonymous copy. | ||
390 | "Swap" shows how much would-be-anonymous memory is also used, but out on | ||
391 | swap. | ||
384 | 392 | ||
385 | This file is only present if the CONFIG_MMU kernel configuration option is | 393 | This file is only present if the CONFIG_MMU kernel configuration option is |
386 | enabled. | 394 | enabled. |
@@ -397,6 +405,9 @@ To clear the bits for the file mapped pages associated with the process | |||
397 | > echo 3 > /proc/PID/clear_refs | 405 | > echo 3 > /proc/PID/clear_refs |
398 | Any other value written to /proc/PID/clear_refs will have no effect. | 406 | Any other value written to /proc/PID/clear_refs will have no effect. |
399 | 407 | ||
408 | The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags | ||
409 | using /proc/kpageflags and number of times a page is mapped using | ||
410 | /proc/kpagecount. For detailed explanation, see Documentation/vm/pagemap.txt. | ||
400 | 411 | ||
401 | 1.2 Kernel data | 412 | 1.2 Kernel data |
402 | --------------- | 413 | --------------- |
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt index fc0e39af43c3..4ede421c9687 100644 --- a/Documentation/filesystems/sharedsubtree.txt +++ b/Documentation/filesystems/sharedsubtree.txt | |||
@@ -62,10 +62,10 @@ replicas continue to be exactly same. | |||
62 | # mount /dev/sd0 /tmp/a | 62 | # mount /dev/sd0 /tmp/a |
63 | 63 | ||
64 | #ls /tmp/a | 64 | #ls /tmp/a |
65 | t1 t2 t2 | 65 | t1 t2 t3 |
66 | 66 | ||
67 | #ls /mnt/a | 67 | #ls /mnt/a |
68 | t1 t2 t2 | 68 | t1 t2 t3 |
69 | 69 | ||
70 | Note that the mount has propagated to the mount at /mnt as well. | 70 | Note that the mount has propagated to the mount at /mnt as well. |
71 | 71 | ||