aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation/filesystems
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r--Documentation/filesystems/9p.txt40
-rw-r--r--Documentation/filesystems/ext4.txt37
-rw-r--r--Documentation/filesystems/gfs2-uevents.txt100
-rw-r--r--Documentation/filesystems/ncpfs.txt6
-rw-r--r--Documentation/filesystems/nfs.txt98
-rw-r--r--Documentation/filesystems/nfs41-server.txt54
-rw-r--r--Documentation/filesystems/nfsroot.txt2
-rw-r--r--Documentation/filesystems/proc.txt28
-rw-r--r--Documentation/filesystems/seq_file.txt2
-rw-r--r--Documentation/filesystems/sharedsubtree.txt220
-rw-r--r--Documentation/filesystems/vfat.txt2
-rw-r--r--Documentation/filesystems/vfs.txt7
12 files changed, 378 insertions, 218 deletions
diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.txt
index 6208f55c44c3..57e0b80a5274 100644
--- a/Documentation/filesystems/9p.txt
+++ b/Documentation/filesystems/9p.txt
@@ -18,11 +18,11 @@ the 9p client is available in the form of a USENIX paper:
18 18
19Other applications are described in the following papers: 19Other applications are described in the following papers:
20 * XCPU & Clustering 20 * XCPU & Clustering
21 http://www.xcpu.org/xcpu-talk.pdf 21 http://xcpu.org/papers/xcpu-talk.pdf
22 * KVMFS: control file system for KVM 22 * KVMFS: control file system for KVM
23 http://www.xcpu.org/kvmfs.pdf 23 http://xcpu.org/papers/kvmfs.pdf
24 * CellFS: A New ProgrammingModel for the Cell BE 24 * CellFS: A New Programming Model for the Cell BE
25 http://www.xcpu.org/cellfs-talk.pdf 25 http://xcpu.org/papers/cellfs-talk.pdf
26 * PROSE I/O: Using 9p to enable Application Partitions 26 * PROSE I/O: Using 9p to enable Application Partitions
27 http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf 27 http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf
28 28
@@ -48,6 +48,7 @@ OPTIONS
48 (see rfdno and wfdno) 48 (see rfdno and wfdno)
49 virtio - connect to the next virtio channel available 49 virtio - connect to the next virtio channel available
50 (from lguest or KVM with trans_virtio module) 50 (from lguest or KVM with trans_virtio module)
51 rdma - connect to a specified RDMA channel
51 52
52 uname=name user name to attempt mount as on the remote server. The 53 uname=name user name to attempt mount as on the remote server. The
53 server may override or ignore this value. Certain user 54 server may override or ignore this value. Certain user
@@ -59,16 +60,22 @@ OPTIONS
59 cache=mode specifies a caching policy. By default, no caches are used. 60 cache=mode specifies a caching policy. By default, no caches are used.
60 loose = no attempts are made at consistency, 61 loose = no attempts are made at consistency,
61 intended for exclusive, read-only mounts 62 intended for exclusive, read-only mounts
63 fscache = use FS-Cache for a persistent, read-only
64 cache backend.
62 65
63 debug=n specifies debug level. The debug level is a bitmask. 66 debug=n specifies debug level. The debug level is a bitmask.
64 0x01 = display verbose error messages 67 0x01 = display verbose error messages
65 0x02 = developer debug (DEBUG_CURRENT) 68 0x02 = developer debug (DEBUG_CURRENT)
66 0x04 = display 9p trace 69 0x04 = display 9p trace
67 0x08 = display VFS trace 70 0x08 = display VFS trace
68 0x10 = display Marshalling debug 71 0x10 = display Marshalling debug
69 0x20 = display RPC debug 72 0x20 = display RPC debug
70 0x40 = display transport debug 73 0x40 = display transport debug
71 0x80 = display allocation debug 74 0x80 = display allocation debug
75 0x100 = display protocol message debug
76 0x200 = display Fid debug
77 0x400 = display packet debug
78 0x800 = display fscache tracing debug
72 79
73 rfdno=n the file descriptor for reading with trans=fd 80 rfdno=n the file descriptor for reading with trans=fd
74 81
@@ -100,6 +107,10 @@ OPTIONS
100 any = v9fs does single attach and performs all 107 any = v9fs does single attach and performs all
101 operations as one user 108 operations as one user
102 109
110 cachetag cache tag to use the specified persistent cache.
111 cache tags for existing cache sessions can be listed at
112 /sys/fs/9p/caches. (applies only to cache=fscache)
113
103RESOURCES 114RESOURCES
104========= 115=========
105 116
@@ -118,7 +129,7 @@ and export.
118A Linux version of the 9p server is now maintained under the npfs project 129A Linux version of the 9p server is now maintained under the npfs project
119on sourceforge (http://sourceforge.net/projects/npfs). The currently 130on sourceforge (http://sourceforge.net/projects/npfs). The currently
120maintained version is the single-threaded version of the server (named spfs) 131maintained version is the single-threaded version of the server (named spfs)
121available from the same CVS repository. 132available from the same SVN repository.
122 133
123There are user and developer mailing lists available through the v9fs project 134There are user and developer mailing lists available through the v9fs project
124on sourceforge (http://sourceforge.net/projects/v9fs). 135on sourceforge (http://sourceforge.net/projects/v9fs).
@@ -126,7 +137,8 @@ on sourceforge (http://sourceforge.net/projects/v9fs).
126A stand-alone version of the module (which should build for any 2.6 kernel) 137A stand-alone version of the module (which should build for any 2.6 kernel)
127is available via (http://github.com/ericvh/9p-sac/tree/master) 138is available via (http://github.com/ericvh/9p-sac/tree/master)
128 139
129News and other information is maintained on SWiK (http://swik.net/v9fs). 140News and other information is maintained on SWiK (http://swik.net/v9fs)
141and the Wiki (http://sf.net/apps/mediawiki/v9fs/index.php).
130 142
131Bug reports may be issued through the kernel.org bugzilla 143Bug reports may be issued through the kernel.org bugzilla
132(http://bugzilla.kernel.org) 144(http://bugzilla.kernel.org)
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 7be02ac5fa36..bf4f4b7e11b3 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -134,15 +134,9 @@ ro Mount filesystem read only. Note that ext4 will
134 mount options "ro,noload" can be used to prevent 134 mount options "ro,noload" can be used to prevent
135 writes to the filesystem. 135 writes to the filesystem.
136 136
137journal_checksum Enable checksumming of the journal transactions.
138 This will allow the recovery code in e2fsck and the
139 kernel to detect corruption in the kernel. It is a
140 compatible change and will be ignored by older kernels.
141
142journal_async_commit Commit block can be written to disk without waiting 137journal_async_commit Commit block can be written to disk without waiting
143 for descriptor blocks. If enabled older kernels cannot 138 for descriptor blocks. If enabled older kernels cannot
144 mount the device. This will enable 'journal_checksum' 139 mount the device.
145 internally.
146 140
147journal=update Update the ext4 file system's journal to the current 141journal=update Update the ext4 file system's journal to the current
148 format. 142 format.
@@ -263,10 +257,18 @@ resuid=n The user ID which may use the reserved blocks.
263 257
264sb=n Use alternate superblock at this location. 258sb=n Use alternate superblock at this location.
265 259
266quota 260quota These options are ignored by the filesystem. They
267noquota 261noquota are used only by quota tools to recognize volumes
268grpquota 262grpquota where quota should be turned on. See documentation
269usrquota 263usrquota in the quota-tools package for more details
264 (http://sourceforge.net/projects/linuxquota).
265
266jqfmt=<quota type> These options tell filesystem details about quota
267usrjquota=<file> so that quota information can be properly updated
268grpjquota=<file> during journal replay. They replace the above
269 quota options. See documentation in the quota-tools
270 package for more details
271 (http://sourceforge.net/projects/linuxquota).
270 272
271bh (*) ext4 associates buffer heads to data pages to 273bh (*) ext4 associates buffer heads to data pages to
272nobh (a) cache disk block mapping information 274nobh (a) cache disk block mapping information
@@ -280,9 +282,16 @@ stripe=n Number of filesystem blocks that mballoc will try
280 to use for allocation size and alignment. For RAID5/6 282 to use for allocation size and alignment. For RAID5/6
281 systems this should be the number of data 283 systems this should be the number of data
282 disks * RAID chunk size in file system blocks. 284 disks * RAID chunk size in file system blocks.
283delalloc (*) Deferring block allocation until write-out time. 285
284nodelalloc Disable delayed allocation. Blocks are allocation 286delalloc (*) Defer block allocation until just before ext4
285 when data is copied from user to page cache. 287 writes out the block(s) in question. This
288 allows ext4 to better allocation decisions
289 more efficiently.
290nodelalloc Disable delayed allocation. Blocks are allocated
291 when the data is copied from userspace to the
292 page cache, either via the write(2) system call
293 or when an mmap'ed page which was previously
294 unallocated is written for the first time.
286 295
287max_batch_time=usec Maximum amount of time ext4 should wait for 296max_batch_time=usec Maximum amount of time ext4 should wait for
288 additional filesystem operations to be batch 297 additional filesystem operations to be batch
diff --git a/Documentation/filesystems/gfs2-uevents.txt b/Documentation/filesystems/gfs2-uevents.txt
new file mode 100644
index 000000000000..fd966dc9979a
--- /dev/null
+++ b/Documentation/filesystems/gfs2-uevents.txt
@@ -0,0 +1,100 @@
1 uevents and GFS2
2 ==================
3
4During the lifetime of a GFS2 mount, a number of uevents are generated.
5This document explains what the events are and what they are used
6for (by gfs_controld in gfs2-utils).
7
8A list of GFS2 uevents
9-----------------------
10
111. ADD
12
13The ADD event occurs at mount time. It will always be the first
14uevent generated by the newly created filesystem. If the mount
15is successful, an ONLINE uevent will follow. If it is not successful
16then a REMOVE uevent will follow.
17
18The ADD uevent has two environment variables: SPECTATOR=[0|1]
19and RDONLY=[0|1] that specify the spectator status (a read-only mount
20with no journal assigned), and read-only (with journal assigned) status
21of the filesystem respectively.
22
232. ONLINE
24
25The ONLINE uevent is generated after a successful mount or remount. It
26has the same environment variables as the ADD uevent. The ONLINE
27uevent, along with the two environment variables for spectator and
28RDONLY are a relatively recent addition (2.6.32-rc+) and will not
29be generated by older kernels.
30
313. CHANGE
32
33The CHANGE uevent is used in two places. One is when reporting the
34successful mount of the filesystem by the first node (FIRSTMOUNT=Done).
35This is used as a signal by gfs_controld that it is then ok for other
36nodes in the cluster to mount the filesystem.
37
38The other CHANGE uevent is used to inform of the completion
39of journal recovery for one of the filesystems journals. It has
40two environment variables, JID= which specifies the journal id which
41has just been recovered, and RECOVERY=[Done|Failed] to indicate the
42success (or otherwise) of the operation. These uevents are generated
43for every journal recovered, whether it is during the initial mount
44process or as the result of gfs_controld requesting a specific journal
45recovery via the /sys/fs/gfs2/<fsname>/lock_module/recovery file.
46
47Because the CHANGE uevent was used (in early versions of gfs_controld)
48without checking the environment variables to discover the state, we
49cannot add any more functions to it without running the risk of
50someone using an older version of the user tools and breaking their
51cluster. For this reason the ONLINE uevent was used when adding a new
52uevent for a successful mount or remount.
53
544. OFFLINE
55
56The OFFLINE uevent is only generated due to filesystem errors and is used
57as part of the "withdraw" mechanism. Currently this doesn't give any
58information about what the error is, which is something that needs to
59be fixed.
60
615. REMOVE
62
63The REMOVE uevent is generated at the end of an unsuccessful mount
64or at the end of a umount of the filesystem. All REMOVE uevents will
65have been preceeded by at least an ADD uevent for the same fileystem,
66and unlike the other uevents is generated automatically by the kernel's
67kobject subsystem.
68
69
70Information common to all GFS2 uevents (uevent environment variables)
71----------------------------------------------------------------------
72
731. LOCKTABLE=
74
75The LOCKTABLE is a string, as supplied on the mount command
76line (locktable=) or via fstab. It is used as a filesystem label
77as well as providing the information for a lock_dlm mount to be
78able to join the cluster.
79
802. LOCKPROTO=
81
82The LOCKPROTO is a string, and its value depends on what is set
83on the mount command line, or via fstab. It will be either
84lock_nolock or lock_dlm. In the future other lock managers
85may be supported.
86
873. JOURNALID=
88
89If a journal is in use by the filesystem (journals are not
90assigned for spectator mounts) then this will give the
91numeric journal id in all GFS2 uevents.
92
934. UUID=
94
95With recent versions of gfs2-utils, mkfs.gfs2 writes a UUID
96into the filesystem superblock. If it exists, this will
97be included in every uevent relating to the filesystem.
98
99
100
diff --git a/Documentation/filesystems/ncpfs.txt b/Documentation/filesystems/ncpfs.txt
index f12c30c93f2f..5af164f4b37b 100644
--- a/Documentation/filesystems/ncpfs.txt
+++ b/Documentation/filesystems/ncpfs.txt
@@ -7,6 +7,6 @@ ftp.gwdg.de/pub/linux/misc/ncpfs, but sunsite and its many mirrors
7will have it as well. 7will have it as well.
8 8
9Related products are linware and mars_nwe, which will give Linux partial 9Related products are linware and mars_nwe, which will give Linux partial
10NetWare server functionality. Linware's home site is 10NetWare server functionality.
11klokan.sh.cvut.cz/pub/linux/linware; mars_nwe can be found on 11
12ftp.gwdg.de/pub/linux/misc/ncpfs. 12mars_nwe can be found on ftp.gwdg.de/pub/linux/misc/ncpfs.
diff --git a/Documentation/filesystems/nfs.txt b/Documentation/filesystems/nfs.txt
new file mode 100644
index 000000000000..f50f26ce6cd0
--- /dev/null
+++ b/Documentation/filesystems/nfs.txt
@@ -0,0 +1,98 @@
1
2The NFS client
3==============
4
5The NFS version 2 protocol was first documented in RFC1094 (March 1989).
6Since then two more major releases of NFS have been published, with NFSv3
7being documented in RFC1813 (June 1995), and NFSv4 in RFC3530 (April
82003).
9
10The Linux NFS client currently supports all the above published versions,
11and work is in progress on adding support for minor version 1 of the NFSv4
12protocol.
13
14The purpose of this document is to provide information on some of the
15upcall interfaces that are used in order to provide the NFS client with
16some of the information that it requires in order to fully comply with
17the NFS spec.
18
19The DNS resolver
20================
21
22NFSv4 allows for one server to refer the NFS client to data that has been
23migrated onto another server by means of the special "fs_locations"
24attribute. See
25 http://tools.ietf.org/html/rfc3530#section-6
26and
27 http://tools.ietf.org/html/draft-ietf-nfsv4-referrals-00
28
29The fs_locations information can take the form of either an ip address and
30a path, or a DNS hostname and a path. The latter requires the NFS client to
31do a DNS lookup in order to mount the new volume, and hence the need for an
32upcall to allow userland to provide this service.
33
34Assuming that the user has the 'rpc_pipefs' filesystem mounted in the usual
35/var/lib/nfs/rpc_pipefs, the upcall consists of the following steps:
36
37 (1) The process checks the dns_resolve cache to see if it contains a
38 valid entry. If so, it returns that entry and exits.
39
40 (2) If no valid entry exists, the helper script '/sbin/nfs_cache_getent'
41 (may be changed using the 'nfs.cache_getent' kernel boot parameter)
42 is run, with two arguments:
43 - the cache name, "dns_resolve"
44 - the hostname to resolve
45
46 (3) After looking up the corresponding ip address, the helper script
47 writes the result into the rpc_pipefs pseudo-file
48 '/var/lib/nfs/rpc_pipefs/cache/dns_resolve/channel'
49 in the following (text) format:
50
51 "<ip address> <hostname> <ttl>\n"
52
53 Where <ip address> is in the usual IPv4 (123.456.78.90) or IPv6
54 (ffee:ddcc:bbaa:9988:7766:5544:3322:1100, ffee::1100, ...) format.
55 <hostname> is identical to the second argument of the helper
56 script, and <ttl> is the 'time to live' of this cache entry (in
57 units of seconds).
58
59 Note: If <ip address> is invalid, say the string "0", then a negative
60 entry is created, which will cause the kernel to treat the hostname
61 as having no valid DNS translation.
62
63
64
65
66A basic sample /sbin/nfs_cache_getent
67=====================================
68
69#!/bin/bash
70#
71ttl=600
72#
73cut=/usr/bin/cut
74getent=/usr/bin/getent
75rpc_pipefs=/var/lib/nfs/rpc_pipefs
76#
77die()
78{
79 echo "Usage: $0 cache_name entry_name"
80 exit 1
81}
82
83[ $# -lt 2 ] && die
84cachename="$1"
85cache_path=${rpc_pipefs}/cache/${cachename}/channel
86
87case "${cachename}" in
88 dns_resolve)
89 name="$2"
90 result="$(${getent} hosts ${name} | ${cut} -f1 -d\ )"
91 [ -z "${result}" ] && result="0"
92 ;;
93 *)
94 die
95 ;;
96esac
97echo "${result} ${name} ${ttl}" >${cache_path}
98
diff --git a/Documentation/filesystems/nfs41-server.txt b/Documentation/filesystems/nfs41-server.txt
index 05d81cbcb2e1..5920fe26e6ff 100644
--- a/Documentation/filesystems/nfs41-server.txt
+++ b/Documentation/filesystems/nfs41-server.txt
@@ -11,6 +11,11 @@ the /proc/fs/nfsd/versions control file. Note that to write this
11control file, the nfsd service must be taken down. Use your user-mode 11control file, the nfsd service must be taken down. Use your user-mode
12nfs-utils to set this up; see rpc.nfsd(8) 12nfs-utils to set this up; see rpc.nfsd(8)
13 13
14(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
15"-4", respectively. Therefore, code meant to work on both new and old
16kernels must turn 4.1 on or off *before* turning support for version 4
17on or off; rpc.nfsd does this correctly.)
18
14The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based 19The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
15on the latest NFSv4.1 Internet Draft: 20on the latest NFSv4.1 Internet Draft:
16http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29 21http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29
@@ -25,6 +30,49 @@ are still under development out of tree.
25See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design 30See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
26for more information. 31for more information.
27 32
33The current implementation is intended for developers only: while it
34does support ordinary file operations on clients we have tested against
35(including the linux client), it is incomplete in ways which may limit
36features unexpectedly, cause known bugs in rare cases, or cause
37interoperability problems with future clients. Known issues:
38
39 - gss support is questionable: currently mounts with kerberos
40 from a linux client are possible, but we aren't really
41 conformant with the spec (for example, we don't use kerberos
42 on the backchannel correctly).
43 - no trunking support: no clients currently take advantage of
44 trunking, but this is a mandatory failure, and its use is
45 recommended to clients in a number of places. (E.g. to ensure
46 timely renewal in case an existing connection's retry timeouts
47 have gotten too long; see section 8.3 of the draft.)
48 Therefore, lack of this feature may cause future clients to
49 fail.
50 - Incomplete backchannel support: incomplete backchannel gss
51 support and no support for BACKCHANNEL_CTL mean that
52 callbacks (hence delegations and layouts) may not be
53 available and clients confused by the incomplete
54 implementation may fail.
55 - Server reboot recovery is unsupported; if the server reboots,
56 clients may fail.
57 - We do not support SSV, which provides security for shared
58 client-server state (thus preventing unauthorized tampering
59 with locks and opens, for example). It is mandatory for
60 servers to support this, though no clients use it yet.
61 - Mandatory operations which we do not support, such as
62 DESTROY_CLIENTID, FREE_STATEID, SECINFO_NO_NAME, and
63 TEST_STATEID, are not currently used by clients, but will be
64 (and the spec recommends their uses in common cases), and
65 clients should not be expected to know how to recover from the
66 case where they are not supported. This will eventually cause
67 interoperability failures.
68
69In addition, some limitations are inherited from the current NFSv4
70implementation:
71
72 - Incomplete delegation enforcement: if a file is renamed or
73 unlinked, a client holding a delegation may continue to
74 indefinitely allow opens of the file under the old name.
75
28The table below, taken from the NFSv4.1 document, lists 76The table below, taken from the NFSv4.1 document, lists
29the operations that are mandatory to implement (REQ), optional 77the operations that are mandatory to implement (REQ), optional
30(OPT), and NFSv4.0 operations that are required not to implement (MNI) 78(OPT), and NFSv4.0 operations that are required not to implement (MNI)
@@ -142,6 +190,12 @@ NS*| CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 |
142 190
143Implementation notes: 191Implementation notes:
144 192
193DELEGPURGE:
194* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
195 CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
196 persist across client reboots). Thus we need not implement this for
197 now.
198
145EXCHANGE_ID: 199EXCHANGE_ID:
146* only SP4_NONE state protection supported 200* only SP4_NONE state protection supported
147* implementation ids are ignored 201* implementation ids are ignored
diff --git a/Documentation/filesystems/nfsroot.txt b/Documentation/filesystems/nfsroot.txt
index 68baddf3c3e0..3ba0b945aaf8 100644
--- a/Documentation/filesystems/nfsroot.txt
+++ b/Documentation/filesystems/nfsroot.txt
@@ -105,7 +105,7 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>
105 the client address and this parameter is NOT empty only 105 the client address and this parameter is NOT empty only
106 replies from the specified server are accepted. 106 replies from the specified server are accepted.
107 107
108 Only required for for NFS root. That is autoconfiguration 108 Only required for NFS root. That is autoconfiguration
109 will not be triggered if it is missing and NFS root is not 109 will not be triggered if it is missing and NFS root is not
110 in operation. 110 in operation.
111 111
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index ffead13f9443..2c48f945546b 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -176,6 +176,7 @@ read the file /proc/PID/status:
176 CapBnd: ffffffffffffffff 176 CapBnd: ffffffffffffffff
177 voluntary_ctxt_switches: 0 177 voluntary_ctxt_switches: 0
178 nonvoluntary_ctxt_switches: 1 178 nonvoluntary_ctxt_switches: 1
179 Stack usage: 12 kB
179 180
180This shows you nearly the same information you would get if you viewed it with 181This shows you nearly the same information you would get if you viewed it with
181the ps command. In fact, ps uses the proc file system to obtain its 182the ps command. In fact, ps uses the proc file system to obtain its
@@ -229,6 +230,7 @@ Table 1-2: Contents of the statm files (as of 2.6.30-rc7)
229 Mems_allowed_list Same as previous, but in "list format" 230 Mems_allowed_list Same as previous, but in "list format"
230 voluntary_ctxt_switches number of voluntary context switches 231 voluntary_ctxt_switches number of voluntary context switches
231 nonvoluntary_ctxt_switches number of non voluntary context switches 232 nonvoluntary_ctxt_switches number of non voluntary context switches
233 Stack usage: stack usage high water mark (round up to page size)
232.............................................................................. 234..............................................................................
233 235
234Table 1-3: Contents of the statm files (as of 2.6.8-rc3) 236Table 1-3: Contents of the statm files (as of 2.6.8-rc3)
@@ -307,7 +309,7 @@ address perms offset dev inode pathname
30708049000-0804a000 rw-p 00001000 03:00 8312 /opt/test 30908049000-0804a000 rw-p 00001000 03:00 8312 /opt/test
3080804a000-0806b000 rw-p 00000000 00:00 0 [heap] 3100804a000-0806b000 rw-p 00000000 00:00 0 [heap]
309a7cb1000-a7cb2000 ---p 00000000 00:00 0 311a7cb1000-a7cb2000 ---p 00000000 00:00 0
310a7cb2000-a7eb2000 rw-p 00000000 00:00 0 312a7cb2000-a7eb2000 rw-p 00000000 00:00 0 [threadstack:001ff4b4]
311a7eb2000-a7eb3000 ---p 00000000 00:00 0 313a7eb2000-a7eb3000 ---p 00000000 00:00 0
312a7eb3000-a7ed5000 rw-p 00000000 00:00 0 314a7eb3000-a7ed5000 rw-p 00000000 00:00 0
313a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6 315a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6
@@ -343,6 +345,7 @@ is not associated with a file:
343 [stack] = the stack of the main process 345 [stack] = the stack of the main process
344 [vdso] = the "virtual dynamic shared object", 346 [vdso] = the "virtual dynamic shared object",
345 the kernel system call handler 347 the kernel system call handler
348 [threadstack:xxxxxxxx] = the stack of the thread, xxxxxxxx is the stack size
346 349
347 or if empty, the mapping is anonymous. 350 or if empty, the mapping is anonymous.
348 351
@@ -375,6 +378,19 @@ of memory currently marked as referenced or accessed.
375This file is only present if the CONFIG_MMU kernel configuration option is 378This file is only present if the CONFIG_MMU kernel configuration option is
376enabled. 379enabled.
377 380
381The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG
382bits on both physical and virtual pages associated with a process.
383To clear the bits for all the pages associated with the process
384 > echo 1 > /proc/PID/clear_refs
385
386To clear the bits for the anonymous pages associated with the process
387 > echo 2 > /proc/PID/clear_refs
388
389To clear the bits for the file mapped pages associated with the process
390 > echo 3 > /proc/PID/clear_refs
391Any other value written to /proc/PID/clear_refs will have no effect.
392
393
3781.2 Kernel data 3941.2 Kernel data
379--------------- 395---------------
380 396
@@ -1032,9 +1048,9 @@ Various pieces of information about kernel activity are available in the
1032since the system first booted. For a quick look, simply cat the file: 1048since the system first booted. For a quick look, simply cat the file:
1033 1049
1034 > cat /proc/stat 1050 > cat /proc/stat
1035 cpu 2255 34 2290 22625563 6290 127 456 0 1051 cpu 2255 34 2290 22625563 6290 127 456 0 0
1036 cpu0 1132 34 1441 11311718 3675 127 438 0 1052 cpu0 1132 34 1441 11311718 3675 127 438 0 0
1037 cpu1 1123 0 849 11313845 2614 0 18 0 1053 cpu1 1123 0 849 11313845 2614 0 18 0 0
1038 intr 114930548 113199788 3 0 5 263 0 4 [... lots more numbers ...] 1054 intr 114930548 113199788 3 0 5 263 0 4 [... lots more numbers ...]
1039 ctxt 1990473 1055 ctxt 1990473
1040 btime 1062191376 1056 btime 1062191376
@@ -1056,6 +1072,7 @@ second). The meanings of the columns are as follows, from left to right:
1056- irq: servicing interrupts 1072- irq: servicing interrupts
1057- softirq: servicing softirqs 1073- softirq: servicing softirqs
1058- steal: involuntary wait 1074- steal: involuntary wait
1075- guest: running a guest
1059 1076
1060The "intr" line gives counts of interrupts serviced since boot time, for each 1077The "intr" line gives counts of interrupts serviced since boot time, for each
1061of the possible system interrupts. The first column is the total of all 1078of the possible system interrupts. The first column is the total of all
@@ -1096,7 +1113,6 @@ Table 1-12: Files in /proc/fs/ext4/<devname>
1096.............................................................................. 1113..............................................................................
1097 File Content 1114 File Content
1098 mb_groups details of multiblock allocator buddy cache of free blocks 1115 mb_groups details of multiblock allocator buddy cache of free blocks
1099 mb_history multiblock allocation history
1100.............................................................................. 1116..............................................................................
1101 1117
1102 1118
@@ -1191,7 +1207,7 @@ The following heuristics are then applied:
1191 * if the task was reniced, its score doubles 1207 * if the task was reniced, its score doubles
1192 * superuser or direct hardware access tasks (CAP_SYS_ADMIN, CAP_SYS_RESOURCE 1208 * superuser or direct hardware access tasks (CAP_SYS_ADMIN, CAP_SYS_RESOURCE
1193 or CAP_SYS_RAWIO) have their score divided by 4 1209 or CAP_SYS_RAWIO) have their score divided by 4
1194 * if oom condition happened in one cpuset and checked task does not belong 1210 * if oom condition happened in one cpuset and checked process does not belong
1195 to it, its score is divided by 8 1211 to it, its score is divided by 8
1196 * the resulting score is multiplied by two to the power of oom_adj, i.e. 1212 * the resulting score is multiplied by two to the power of oom_adj, i.e.
1197 points <<= oom_adj when it is positive and 1213 points <<= oom_adj when it is positive and
diff --git a/Documentation/filesystems/seq_file.txt b/Documentation/filesystems/seq_file.txt
index b843743aa0b5..0d15ebccf5b0 100644
--- a/Documentation/filesystems/seq_file.txt
+++ b/Documentation/filesystems/seq_file.txt
@@ -46,7 +46,7 @@ better to do. The file is seekable, in that one can do something like the
46following: 46following:
47 47
48 dd if=/proc/sequence of=out1 count=1 48 dd if=/proc/sequence of=out1 count=1
49 dd if=/proc/sequence skip=1 out=out2 count=1 49 dd if=/proc/sequence skip=1 of=out2 count=1
50 50
51Then concatenate the output files out1 and out2 and get the right 51Then concatenate the output files out1 and out2 and get the right
52result. Yes, it is a thoroughly useless module, but the point is to show 52result. Yes, it is a thoroughly useless module, but the point is to show
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt
index 736540045dc7..23a181074f94 100644
--- a/Documentation/filesystems/sharedsubtree.txt
+++ b/Documentation/filesystems/sharedsubtree.txt
@@ -4,7 +4,7 @@ Shared Subtrees
4Contents: 4Contents:
5 1) Overview 5 1) Overview
6 2) Features 6 2) Features
7 3) smount command 7 3) Setting mount states
8 4) Use-case 8 4) Use-case
9 5) Detailed semantics 9 5) Detailed semantics
10 6) Quiz 10 6) Quiz
@@ -41,14 +41,14 @@ replicas continue to be exactly same.
41 41
42 Here is an example: 42 Here is an example:
43 43
44 Lets say /mnt has a mount that is shared. 44 Let's say /mnt has a mount that is shared.
45 mount --make-shared /mnt 45 mount --make-shared /mnt
46 46
47 note: mount command does not yet support the --make-shared flag. 47 Note: mount(8) command now supports the --make-shared flag,
48 I have included a small C program which does the same by executing 48 so the sample 'smount' program is no longer needed and has been
49 'smount /mnt shared' 49 removed.
50 50
51 #mount --bind /mnt /tmp 51 # mount --bind /mnt /tmp
52 The above command replicates the mount at /mnt to the mountpoint /tmp 52 The above command replicates the mount at /mnt to the mountpoint /tmp
53 and the contents of both the mounts remain identical. 53 and the contents of both the mounts remain identical.
54 54
@@ -58,8 +58,8 @@ replicas continue to be exactly same.
58 #ls /tmp 58 #ls /tmp
59 a b c 59 a b c
60 60
61 Now lets say we mount a device at /tmp/a 61 Now let's say we mount a device at /tmp/a
62 #mount /dev/sd0 /tmp/a 62 # mount /dev/sd0 /tmp/a
63 63
64 #ls /tmp/a 64 #ls /tmp/a
65 t1 t2 t2 65 t1 t2 t2
@@ -80,21 +80,20 @@ replicas continue to be exactly same.
80 80
81 Here is an example: 81 Here is an example:
82 82
83 Lets say /mnt has a mount which is shared. 83 Let's say /mnt has a mount which is shared.
84 #mount --make-shared /mnt 84 # mount --make-shared /mnt
85 85
86 Lets bind mount /mnt to /tmp 86 Let's bind mount /mnt to /tmp
87 #mount --bind /mnt /tmp 87 # mount --bind /mnt /tmp
88 88
89 the new mount at /tmp becomes a shared mount and it is a replica of 89 the new mount at /tmp becomes a shared mount and it is a replica of
90 the mount at /mnt. 90 the mount at /mnt.
91 91
92 Now lets make the mount at /tmp; a slave of /mnt 92 Now let's make the mount at /tmp; a slave of /mnt
93 #mount --make-slave /tmp 93 # mount --make-slave /tmp
94 [or smount /tmp slave]
95 94
96 lets mount /dev/sd0 on /mnt/a 95 let's mount /dev/sd0 on /mnt/a
97 #mount /dev/sd0 /mnt/a 96 # mount /dev/sd0 /mnt/a
98 97
99 #ls /mnt/a 98 #ls /mnt/a
100 t1 t2 t3 99 t1 t2 t3
@@ -104,9 +103,9 @@ replicas continue to be exactly same.
104 103
105 Note the mount event has propagated to the mount at /tmp 104 Note the mount event has propagated to the mount at /tmp
106 105
107 However lets see what happens if we mount something on the mount at /tmp 106 However let's see what happens if we mount something on the mount at /tmp
108 107
109 #mount /dev/sd1 /tmp/b 108 # mount /dev/sd1 /tmp/b
110 109
111 #ls /tmp/b 110 #ls /tmp/b
112 s1 s2 s3 111 s1 s2 s3
@@ -124,12 +123,11 @@ replicas continue to be exactly same.
124 123
1252d) A unbindable mount is a unbindable private mount 1242d) A unbindable mount is a unbindable private mount
126 125
127 lets say we have a mount at /mnt and we make is unbindable 126 let's say we have a mount at /mnt and we make is unbindable
128 127
129 #mount --make-unbindable /mnt 128 # mount --make-unbindable /mnt
130 [ smount /mnt unbindable ]
131 129
132 Lets try to bind mount this mount somewhere else. 130 Let's try to bind mount this mount somewhere else.
133 # mount --bind /mnt /tmp 131 # mount --bind /mnt /tmp
134 mount: wrong fs type, bad option, bad superblock on /mnt, 132 mount: wrong fs type, bad option, bad superblock on /mnt,
135 or too many mounted file systems 133 or too many mounted file systems
@@ -137,149 +135,15 @@ replicas continue to be exactly same.
137 Binding a unbindable mount is a invalid operation. 135 Binding a unbindable mount is a invalid operation.
138 136
139 137
1403) smount command 1383) Setting mount states
141 139
142 Currently the mount command is not aware of shared subtree features. 140 The mount command (util-linux package) can be used to set mount
143 Work is in progress to add the support in mount ( util-linux package ). 141 states:
144 Till then use the following program.
145 142
146 ------------------------------------------------------------------------ 143 mount --make-shared mountpoint
147 // 144 mount --make-slave mountpoint
148 //this code was developed my Miklos Szeredi <miklos@szeredi.hu> 145 mount --make-private mountpoint
149 //and modified by Ram Pai <linuxram@us.ibm.com> 146 mount --make-unbindable mountpoint
150 // sample usage:
151 // smount /tmp shared
152 //
153 #include <stdio.h>
154 #include <stdlib.h>
155 #include <unistd.h>
156 #include <string.h>
157 #include <sys/mount.h>
158 #include <sys/fsuid.h>
159
160 #ifndef MS_REC
161 #define MS_REC 0x4000 /* 16384: Recursive loopback */
162 #endif
163
164 #ifndef MS_SHARED
165 #define MS_SHARED 1<<20 /* Shared */
166 #endif
167
168 #ifndef MS_PRIVATE
169 #define MS_PRIVATE 1<<18 /* Private */
170 #endif
171
172 #ifndef MS_SLAVE
173 #define MS_SLAVE 1<<19 /* Slave */
174 #endif
175
176 #ifndef MS_UNBINDABLE
177 #define MS_UNBINDABLE 1<<17 /* Unbindable */
178 #endif
179
180 int main(int argc, char *argv[])
181 {
182 int type;
183 if(argc != 3) {
184 fprintf(stderr, "usage: %s dir "
185 "<rshared|rslave|rprivate|runbindable|shared|slave"
186 "|private|unbindable>\n" , argv[0]);
187 return 1;
188 }
189
190 fprintf(stdout, "%s %s %s\n", argv[0], argv[1], argv[2]);
191
192 if (strcmp(argv[2],"rshared")==0)
193 type=(MS_SHARED|MS_REC);
194 else if (strcmp(argv[2],"rslave")==0)
195 type=(MS_SLAVE|MS_REC);
196 else if (strcmp(argv[2],"rprivate")==0)
197 type=(MS_PRIVATE|MS_REC);
198 else if (strcmp(argv[2],"runbindable")==0)
199 type=(MS_UNBINDABLE|MS_REC);
200 else if (strcmp(argv[2],"shared")==0)
201 type=MS_SHARED;
202 else if (strcmp(argv[2],"slave")==0)
203 type=MS_SLAVE;
204 else if (strcmp(argv[2],"private")==0)
205 type=MS_PRIVATE;
206 else if (strcmp(argv[2],"unbindable")==0)
207 type=MS_UNBINDABLE;
208 else {
209 fprintf(stderr, "invalid operation: %s\n", argv[2]);
210 return 1;
211 }
212 setfsuid(getuid());
213
214 if(mount("", argv[1], "dontcare", type, "") == -1) {
215 perror("mount");
216 return 1;
217 }
218 return 0;
219 }
220 -----------------------------------------------------------------------
221
222 Copy the above code snippet into smount.c
223 gcc -o smount smount.c
224
225
226 (i) To mark all the mounts under /mnt as shared execute the following
227 command:
228
229 smount /mnt rshared
230 the corresponding syntax planned for mount command is
231 mount --make-rshared /mnt
232
233 just to mark a mount /mnt as shared, execute the following
234 command:
235 smount /mnt shared
236 the corresponding syntax planned for mount command is
237 mount --make-shared /mnt
238
239 (ii) To mark all the shared mounts under /mnt as slave execute the
240 following
241
242 command:
243 smount /mnt rslave
244 the corresponding syntax planned for mount command is
245 mount --make-rslave /mnt
246
247 just to mark a mount /mnt as slave, execute the following
248 command:
249 smount /mnt slave
250 the corresponding syntax planned for mount command is
251 mount --make-slave /mnt
252
253 (iii) To mark all the mounts under /mnt as private execute the
254 following command:
255
256 smount /mnt rprivate
257 the corresponding syntax planned for mount command is
258 mount --make-rprivate /mnt
259
260 just to mark a mount /mnt as private, execute the following
261 command:
262 smount /mnt private
263 the corresponding syntax planned for mount command is
264 mount --make-private /mnt
265
266 NOTE: by default all the mounts are created as private. But if
267 you want to change some shared/slave/unbindable mount as
268 private at a later point in time, this command can help.
269
270 (iv) To mark all the mounts under /mnt as unbindable execute the
271 following
272
273 command:
274 smount /mnt runbindable
275 the corresponding syntax planned for mount command is
276 mount --make-runbindable /mnt
277
278 just to mark a mount /mnt as unbindable, execute the following
279 command:
280 smount /mnt unbindable
281 the corresponding syntax planned for mount command is
282 mount --make-unbindable /mnt
283 147
284 148
2854) Use cases 1494) Use cases
@@ -350,7 +214,7 @@ replicas continue to be exactly same.
350 mount --rbind / /view/v3 214 mount --rbind / /view/v3
351 mount --rbind / /view/v4 215 mount --rbind / /view/v4
352 216
353 and if /usr has a versioning filesystem mounted, than that 217 and if /usr has a versioning filesystem mounted, then that
354 mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and 218 mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and
355 /view/v4/usr too 219 /view/v4/usr too
356 220
@@ -390,7 +254,7 @@ replicas continue to be exactly same.
390 254
391 For example: 255 For example:
392 mount --make-shared /mnt 256 mount --make-shared /mnt
393 mount --bin /mnt /tmp 257 mount --bind /mnt /tmp
394 258
395 The mount at /mnt and that at /tmp are both shared and belong 259 The mount at /mnt and that at /tmp are both shared and belong
396 to the same peer group. Anything mounted or unmounted under 260 to the same peer group. Anything mounted or unmounted under
@@ -558,7 +422,7 @@ replicas continue to be exactly same.
558 then the subtree under the unbindable mount is pruned in the new 422 then the subtree under the unbindable mount is pruned in the new
559 location. 423 location.
560 424
561 eg: lets say we have the following mount tree. 425 eg: let's say we have the following mount tree.
562 426
563 A 427 A
564 / \ 428 / \
@@ -566,7 +430,7 @@ replicas continue to be exactly same.
566 / \ / \ 430 / \ / \
567 D E F G 431 D E F G
568 432
569 Lets say all the mount except the mount C in the tree are 433 Let's say all the mount except the mount C in the tree are
570 of a type other than unbindable. 434 of a type other than unbindable.
571 435
572 If this tree is rbound to say Z 436 If this tree is rbound to say Z
@@ -683,13 +547,13 @@ replicas continue to be exactly same.
683 'b' on mounts that receive propagation from mount 'B' and does not have 547 'b' on mounts that receive propagation from mount 'B' and does not have
684 sub-mounts within them are unmounted. 548 sub-mounts within them are unmounted.
685 549
686 Example: Lets say 'B1', 'B2', 'B3' are shared mounts that propagate to 550 Example: Let's say 'B1', 'B2', 'B3' are shared mounts that propagate to
687 each other. 551 each other.
688 552
689 lets say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount 553 let's say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
690 'B1', 'B2' and 'B3' respectively. 554 'B1', 'B2' and 'B3' respectively.
691 555
692 lets say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on 556 let's say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
693 mount 'B1', 'B2' and 'B3' respectively. 557 mount 'B1', 'B2' and 'B3' respectively.
694 558
695 if 'C1' is unmounted, all the mounts that are most-recently-mounted on 559 if 'C1' is unmounted, all the mounts that are most-recently-mounted on
@@ -710,7 +574,7 @@ replicas continue to be exactly same.
710 A cloned namespace contains all the mounts as that of the parent 574 A cloned namespace contains all the mounts as that of the parent
711 namespace. 575 namespace.
712 576
713 Lets say 'A' and 'B' are the corresponding mounts in the parent and the 577 Let's say 'A' and 'B' are the corresponding mounts in the parent and the
714 child namespace. 578 child namespace.
715 579
716 If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to 580 If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to
@@ -759,11 +623,11 @@ replicas continue to be exactly same.
759 mount --make-slave /mnt 623 mount --make-slave /mnt
760 624
761 At this point we have the first mount at /tmp and 625 At this point we have the first mount at /tmp and
762 its root dentry is 1. Lets call this mount 'A' 626 its root dentry is 1. Let's call this mount 'A'
763 And then we have a second mount at /tmp1 with root 627 And then we have a second mount at /tmp1 with root
764 dentry 2. Lets call this mount 'B' 628 dentry 2. Let's call this mount 'B'
765 Next we have a third mount at /mnt with root dentry 629 Next we have a third mount at /mnt with root dentry
766 mnt. Lets call this mount 'C' 630 mnt. Let's call this mount 'C'
767 631
768 'B' is the slave of 'A' and 'C' is a slave of 'B' 632 'B' is the slave of 'A' and 'C' is a slave of 'B'
769 A -> B -> C 633 A -> B -> C
@@ -794,7 +658,7 @@ replicas continue to be exactly same.
794 658
795 Q3 Why is unbindable mount needed? 659 Q3 Why is unbindable mount needed?
796 660
797 Lets say we want to replicate the mount tree at multiple 661 Let's say we want to replicate the mount tree at multiple
798 locations within the same subtree. 662 locations within the same subtree.
799 663
800 if one rbind mounts a tree within the same subtree 'n' times 664 if one rbind mounts a tree within the same subtree 'n' times
@@ -803,7 +667,7 @@ replicas continue to be exactly same.
803 mounts. Here is a example. 667 mounts. Here is a example.
804 668
805 step 1: 669 step 1:
806 lets say the root tree has just two directories with 670 let's say the root tree has just two directories with
807 one vfsmount. 671 one vfsmount.
808 root 672 root
809 / \ 673 / \
@@ -875,7 +739,7 @@ replicas continue to be exactly same.
875 Unclonable mounts come in handy here. 739 Unclonable mounts come in handy here.
876 740
877 step 1: 741 step 1:
878 lets say the root tree has just two directories with 742 let's say the root tree has just two directories with
879 one vfsmount. 743 one vfsmount.
880 root 744 root
881 / \ 745 / \
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index b58b84b50fa2..eed520fd0c8e 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -102,7 +102,7 @@ shortname=lower|win95|winnt|mixed
102 winnt: emulate the Windows NT rule for display/create. 102 winnt: emulate the Windows NT rule for display/create.
103 mixed: emulate the Windows NT rule for display, 103 mixed: emulate the Windows NT rule for display,
104 emulate the Windows 95 rule for create. 104 emulate the Windows 95 rule for create.
105 Default setting is `lower'. 105 Default setting is `mixed'.
106 106
107tz=UTC -- Interpret timestamps as UTC rather than local time. 107tz=UTC -- Interpret timestamps as UTC rather than local time.
108 This option disables the conversion of timestamps 108 This option disables the conversion of timestamps
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index f49eecf2e573..623f094c9d8d 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -536,6 +536,7 @@ struct address_space_operations {
536 /* migrate the contents of a page to the specified target */ 536 /* migrate the contents of a page to the specified target */
537 int (*migratepage) (struct page *, struct page *); 537 int (*migratepage) (struct page *, struct page *);
538 int (*launder_page) (struct page *); 538 int (*launder_page) (struct page *);
539 int (*error_remove_page) (struct mapping *mapping, struct page *page);
539}; 540};
540 541
541 writepage: called by the VM to write a dirty page to backing store. 542 writepage: called by the VM to write a dirty page to backing store.
@@ -694,6 +695,12 @@ struct address_space_operations {
694 prevent redirtying the page, it is kept locked during the whole 695 prevent redirtying the page, it is kept locked during the whole
695 operation. 696 operation.
696 697
698 error_remove_page: normally set to generic_error_remove_page if truncation
699 is ok for this address space. Used for memory failure handling.
700 Setting this implies you deal with pages going away under you,
701 unless you have them locked or reference counts increased.
702
703
697The File Object 704The File Object
698=============== 705===============
699 706