aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation/filesystems
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2016-06-30 23:34:49 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2016-06-30 23:34:49 -0400
commitb223f4e215b32849b841e750e83a915b670070f5 (patch)
tree75340f6305028de331a17255018869822b3886d2 /Documentation/filesystems
parentf4e6d844bdc142322905d137a9e44e07eee43c5c (diff)
parent0cac643c102c0632dc2cc81e2490b0fec1cac0af (diff)
Merge branch 'd_real' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs into work.misc
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r--Documentation/filesystems/Locking5
-rw-r--r--Documentation/filesystems/devpts.txt145
-rw-r--r--Documentation/filesystems/vfs.txt40
3 files changed, 44 insertions, 146 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 08086dc160d3..a38da93865c2 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -20,6 +20,8 @@ prototypes:
20 char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); 20 char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
21 struct vfsmount *(*d_automount)(struct path *path); 21 struct vfsmount *(*d_automount)(struct path *path);
22 int (*d_manage)(struct dentry *, bool); 22 int (*d_manage)(struct dentry *, bool);
23 struct dentry *(*d_real)(struct dentry *, const struct inode *,
24 unsigned int);
23 25
24locking rules: 26locking rules:
25 rename_lock ->d_lock may block rcu-walk 27 rename_lock ->d_lock may block rcu-walk
@@ -34,6 +36,7 @@ d_iput: no no yes no
34d_dname: no no no no 36d_dname: no no no no
35d_automount: no no yes no 37d_automount: no no yes no
36d_manage: no no yes (ref-walk) maybe 38d_manage: no no yes (ref-walk) maybe
39d_real no no yes no
37 40
38--------------------------- inode_operations --------------------------- 41--------------------------- inode_operations ---------------------------
39prototypes: 42prototypes:
@@ -66,7 +69,6 @@ prototypes:
66 struct file *, unsigned open_flag, 69 struct file *, unsigned open_flag,
67 umode_t create_mode, int *opened); 70 umode_t create_mode, int *opened);
68 int (*tmpfile) (struct inode *, struct dentry *, umode_t); 71 int (*tmpfile) (struct inode *, struct dentry *, umode_t);
69 int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
70 72
71locking rules: 73locking rules:
72 all may block 74 all may block
@@ -95,7 +97,6 @@ fiemap: no
95update_time: no 97update_time: no
96atomic_open: yes 98atomic_open: yes
97tmpfile: no 99tmpfile: no
98dentry_open: no
99 100
100 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on 101 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
101victim. 102victim.
diff --git a/Documentation/filesystems/devpts.txt b/Documentation/filesystems/devpts.txt
index 30d2fcb32f72..9f94fe276dea 100644
--- a/Documentation/filesystems/devpts.txt
+++ b/Documentation/filesystems/devpts.txt
@@ -1,141 +1,26 @@
1Each mount of the devpts filesystem is now distinct such that ptys
2and their indicies allocated in one mount are independent from ptys
3and their indicies in all other mounts.
1 4
2To support containers, we now allow multiple instances of devpts filesystem, 5All mounts of the devpts filesystem now create a /dev/pts/ptmx node
3such that indices of ptys allocated in one instance are independent of indices 6with permissions 0000.
4allocated in other instances of devpts.
5 7
6To preserve backward compatibility, this support for multiple instances is 8To retain backwards compatibility the a ptmx device node (aka any node
7enabled only if: 9created with "mknod name c 5 2") when opened will look for an instance
10of devpts under the name "pts" in the same directory as the ptmx device
11node.
8 12
9 - CONFIG_DEVPTS_MULTIPLE_INSTANCES=y, and 13As an option instead of placing a /dev/ptmx device node at /dev/ptmx
10 - '-o newinstance' mount option is specified while mounting devpts 14it is possible to place a symlink to /dev/pts/ptmx at /dev/ptmx or
11 15to bind mount /dev/ptx/ptmx to /dev/ptmx. If you opt for using
12IOW, devpts now supports both single-instance and multi-instance semantics. 16the devpts filesystem in this manner devpts should be mounted with
13 17the ptmxmode=0666, or chmod 0666 /dev/pts/ptmx should be called.
14If CONFIG_DEVPTS_MULTIPLE_INSTANCES=n, there is no change in behavior and
15this referred to as the "legacy" mode. In this mode, the new mount options
16(-o newinstance and -o ptmxmode) will be ignored with a 'bogus option' message
17on console.
18
19If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and devpts is mounted without the
20'newinstance' option (as in current start-up scripts) the new mount binds
21to the initial kernel mount of devpts. This mode is referred to as the
22'single-instance' mode and the current, single-instance semantics are
23preserved, i.e PTYs are common across the system.
24
25The only difference between this single-instance mode and the legacy mode
26is the presence of new, '/dev/pts/ptmx' node with permissions 0000, which
27can safely be ignored.
28
29If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and 'newinstance' option is specified,
30the mount is considered to be in the multi-instance mode and a new instance
31of the devpts fs is created. Any ptys created in this instance are independent
32of ptys in other instances of devpts. Like in the single-instance mode, the
33/dev/pts/ptmx node is present. To effectively use the multi-instance mode,
34open of /dev/ptmx must be a redirected to '/dev/pts/ptmx' using a symlink or
35bind-mount.
36
37Eg: A container startup script could do the following:
38
39 $ chmod 0666 /dev/pts/ptmx
40 $ rm /dev/ptmx
41 $ ln -s pts/ptmx /dev/ptmx
42 $ ns_exec -cm /bin/bash
43
44 # We are now in new container
45
46 $ umount /dev/pts
47 $ mount -t devpts -o newinstance lxcpts /dev/pts
48 $ sshd -p 1234
49
50where 'ns_exec -cm /bin/bash' calls clone() with CLONE_NEWNS flag and execs
51/bin/bash in the child process. A pty created by the sshd is not visible in
52the original mount of /dev/pts.
53 18
54Total count of pty pairs in all instances is limited by sysctls: 19Total count of pty pairs in all instances is limited by sysctls:
55kernel.pty.max = 4096 - global limit 20kernel.pty.max = 4096 - global limit
56kernel.pty.reserve = 1024 - reserve for initial instance 21kernel.pty.reserve = 1024 - reserved for filesystems mounted from the initial mount namespace
57kernel.pty.nr - current count of ptys 22kernel.pty.nr - current count of ptys
58 23
59Per-instance limit could be set by adding mount option "max=<count>". 24Per-instance limit could be set by adding mount option "max=<count>".
60This feature was added in kernel 3.4 together with sysctl kernel.pty.reserve. 25This feature was added in kernel 3.4 together with sysctl kernel.pty.reserve.
61In kernels older than 3.4 sysctl kernel.pty.max works as per-instance limit. 26In kernels older than 3.4 sysctl kernel.pty.max works as per-instance limit.
62
63User-space changes
64------------------
65
66In multi-instance mode (i.e '-o newinstance' mount option is specified at least
67once), following user-space issues should be noted.
68
691. If -o newinstance mount option is never used, /dev/pts/ptmx can be ignored
70 and no change is needed to system-startup scripts.
71
722. To effectively use multi-instance mode (i.e -o newinstance is specified)
73 administrators or startup scripts should "redirect" open of /dev/ptmx to
74 /dev/pts/ptmx using either a bind mount or symlink.
75
76 $ mount -t devpts -o newinstance devpts /dev/pts
77
78 followed by either
79
80 $ rm /dev/ptmx
81 $ ln -s pts/ptmx /dev/ptmx
82 $ chmod 666 /dev/pts/ptmx
83 or
84 $ mount -o bind /dev/pts/ptmx /dev/ptmx
85
863. The '/dev/ptmx -> pts/ptmx' symlink is the preferred method since it
87 enables better error-reporting and treats both single-instance and
88 multi-instance mounts similarly.
89
90 But this method requires that system-startup scripts set the mode of
91 /dev/pts/ptmx correctly (default mode is 0000). The scripts can set the
92 mode by, either
93
94 - adding ptmxmode mount option to devpts entry in /etc/fstab, or
95 - using 'chmod 0666 /dev/pts/ptmx'
96
974. If multi-instance mode mount is needed for containers, but the system
98 startup scripts have not yet been updated, container-startup scripts
99 should bind mount /dev/ptmx to /dev/pts/ptmx to avoid breaking single-
100 instance mounts.
101
102 Or, in general, container-startup scripts should use:
103
104 mount -t devpts -o newinstance -o ptmxmode=0666 devpts /dev/pts
105 if [ ! -L /dev/ptmx ]; then
106 mount -o bind /dev/pts/ptmx /dev/ptmx
107 fi
108
109 When all devpts mounts are multi-instance, /dev/ptmx can permanently be
110 a symlink to pts/ptmx and the bind mount can be ignored.
111
1125. A multi-instance mount that is not accompanied by the /dev/ptmx to
113 /dev/pts/ptmx redirection would result in an unusable/unreachable pty.
114
115 mount -t devpts -o newinstance lxcpts /dev/pts
116
117 immediately followed by:
118
119 open("/dev/ptmx")
120
121 would create a pty, say /dev/pts/7, in the initial kernel mount.
122 But /dev/pts/7 would be invisible in the new mount.
123
1246. The permissions for /dev/pts/ptmx node should be specified when mounting
125 /dev/pts, using the '-o ptmxmode=%o' mount option (default is 0000).
126
127 mount -t devpts -o newinstance -o ptmxmode=0644 devpts /dev/pts
128
129 The permissions can be later be changed as usual with 'chmod'.
130
131 chmod 666 /dev/pts/ptmx
132
1337. A mount of devpts without the 'newinstance' option results in binding to
134 initial kernel mount. This behavior while preserving legacy semantics,
135 does not provide strict isolation in a container environment. i.e by
136 mounting devpts without the 'newinstance' option, a container could
137 get visibility into the 'host' or root container's devpts.
138
139 To workaround this and have strict isolation, all mounts of devpts,
140 including the mount in the root container, should use the newinstance
141 option.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index d4e07c00e18e..70a056fe51a3 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -364,7 +364,6 @@ struct inode_operations {
364 int (*atomic_open)(struct inode *, struct dentry *, struct file *, 364 int (*atomic_open)(struct inode *, struct dentry *, struct file *,
365 unsigned open_flag, umode_t create_mode, int *opened); 365 unsigned open_flag, umode_t create_mode, int *opened);
366 int (*tmpfile) (struct inode *, struct dentry *, umode_t); 366 int (*tmpfile) (struct inode *, struct dentry *, umode_t);
367 int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
368}; 367};
369 368
370Again, all methods are called without any locks being held, unless 369Again, all methods are called without any locks being held, unless
@@ -694,13 +693,6 @@ struct address_space_operations {
694 but instead uses bmap to find out where the blocks in the file 693 but instead uses bmap to find out where the blocks in the file
695 are and uses those addresses directly. 694 are and uses those addresses directly.
696 695
697 dentry_open: *WARNING: probably going away soon, do not use!* This is an
698 alternative to f_op->open(), the difference is that this method may open
699 a file not necessarily originating from the same filesystem as the one
700 i_op->open() was called on. It may be useful for stacking filesystems
701 which want to allow native I/O directly on underlying files.
702
703
704 invalidatepage: If a page has PagePrivate set, then invalidatepage 696 invalidatepage: If a page has PagePrivate set, then invalidatepage
705 will be called when part or all of the page is to be removed 697 will be called when part or all of the page is to be removed
706 from the address space. This generally corresponds to either a 698 from the address space. This generally corresponds to either a
@@ -936,6 +928,8 @@ struct dentry_operations {
936 char *(*d_dname)(struct dentry *, char *, int); 928 char *(*d_dname)(struct dentry *, char *, int);
937 struct vfsmount *(*d_automount)(struct path *); 929 struct vfsmount *(*d_automount)(struct path *);
938 int (*d_manage)(struct dentry *, bool); 930 int (*d_manage)(struct dentry *, bool);
931 struct dentry *(*d_real)(struct dentry *, const struct inode *,
932 unsigned int);
939}; 933};
940 934
941 d_revalidate: called when the VFS needs to revalidate a dentry. This 935 d_revalidate: called when the VFS needs to revalidate a dentry. This
@@ -1020,6 +1014,14 @@ struct dentry_operations {
1020 at the end of the buffer, and returns a pointer to the first char. 1014 at the end of the buffer, and returns a pointer to the first char.
1021 dynamic_dname() helper function is provided to take care of this. 1015 dynamic_dname() helper function is provided to take care of this.
1022 1016
1017 Example :
1018
1019 static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen)
1020 {
1021 return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
1022 dentry->d_inode->i_ino);
1023 }
1024
1023 d_automount: called when an automount dentry is to be traversed (optional). 1025 d_automount: called when an automount dentry is to be traversed (optional).
1024 This should create a new VFS mount record and return the record to the 1026 This should create a new VFS mount record and return the record to the
1025 caller. The caller is supplied with a path parameter giving the 1027 caller. The caller is supplied with a path parameter giving the
@@ -1058,13 +1060,23 @@ struct dentry_operations {
1058 This function is only used if DCACHE_MANAGE_TRANSIT is set on the 1060 This function is only used if DCACHE_MANAGE_TRANSIT is set on the
1059 dentry being transited from. 1061 dentry being transited from.
1060 1062
1061Example : 1063 d_real: overlay/union type filesystems implement this method to return one of
1064 the underlying dentries hidden by the overlay. It is used in three
1065 different modes:
1062 1066
1063static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen) 1067 Called from open it may need to copy-up the file depending on the
1064{ 1068 supplied open flags. This mode is selected with a non-zero flags
1065 return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]", 1069 argument. In this mode the d_real method can return an error.
1066 dentry->d_inode->i_ino); 1070
1067} 1071 Called from file_dentry() it returns the real dentry matching the inode
1072 argument. The real dentry may be from a lower layer already copied up,
1073 but still referenced from the file. This mode is selected with a
1074 non-NULL inode argument. This will always succeed.
1075
1076 With NULL inode and zero flags the topmost real underlying dentry is
1077 returned. This will always succeed.
1078
1079 This method is never called with both non-NULL inode and non-zero flags.
1068 1080
1069Each dentry has a pointer to its parent dentry, as well as a hash list 1081Each dentry has a pointer to its parent dentry, as well as a hash list
1070of child dentries. Child dentries are basically like files in a 1082of child dentries. Child dentries are basically like files in a