aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2016-06-02 11:29:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-06-05 13:36:01 -0400
commiteedf265aa003b4781de24cfed40a655a664457e6 (patch)
tree0e37f0a0c6fd15f7528aa3d3bfaec5685f083282
parent049ec1b5a76d34a6980cccdb7c0baeb4eed7a993 (diff)
devpts: Make each mount of devpts an independent filesystem.
The /dev/ptmx device node is changed to lookup the directory entry "pts" in the same directory as the /dev/ptmx device node was opened in. If there is a "pts" entry and that entry is a devpts filesystem /dev/ptmx uses that filesystem. Otherwise the open of /dev/ptmx fails. The DEVPTS_MULTIPLE_INSTANCES configuration option is removed, so that userspace can now safely depend on each mount of devpts creating a new instance of the filesystem. Each mount of devpts is now a separate and equal filesystem. Reserved ttys are now available to all instances of devpts where the mounter is in the initial mount namespace. A new vfs helper path_pts is introduced that finds a directory entry named "pts" in the directory of the passed in path, and changes the passed in path to point to it. The helper path_pts uses a function path_parent_directory that was factored out of follow_dotdot. In the implementation of devpts: - devpts_mnt is killed as it is no longer meaningful if all mounts of devpts are equal. - pts_sb_from_inode is replaced by just inode->i_sb as all cached inodes in the tty layer are now from the devpts filesystem. - devpts_add_ref is rolled into the new function devpts_ptmx. And the unnecessary inode hold is removed. - devpts_del_ref is renamed devpts_release and reduced to just a deacrivate_super. - The newinstance mount option continues to be accepted but is now ignored. In devpts_fs.h definitions for when !CONFIG_UNIX98_PTYS are removed as they are never used. Documentation/filesystems/devices.txt is updated to describe the current situation. This has been verified to work properly on openwrt-15.05, centos5, centos6, centos7, debian-6.0.2, debian-7.9, debian-8.2, ubuntu-14.04.3, ubuntu-15.10, fedora23, magia-5, mint-17.3, opensuse-42.1, slackware-14.1, gentoo-20151225 (13.0?), archlinux-2015-12-01. With the caveat that on centos6 and on slackware-14.1 that there wind up being two instances of the devpts filesystem mounted on /dev/pts, the lower copy does not end up getting used. Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Greg KH <greg@kroah.com> Cc: Peter Hurley <peter@hurleysoftware.com> Cc: Peter Anvin <hpa@zytor.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Serge Hallyn <serge.hallyn@ubuntu.com> Cc: Willy Tarreau <w@1wt.eu> Cc: Aurelien Jarno <aurelien@aurel32.net> Cc: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk> Cc: Jann Horn <jann@thejh.net> Cc: Jiri Slaby <jslaby@suse.com> Cc: Florian Weimer <fw@deneb.enyo.de> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/filesystems/devpts.txt145
-rw-r--r--drivers/tty/Kconfig11
-rw-r--r--drivers/tty/pty.c15
-rw-r--r--fs/devpts/inode.c191
-rw-r--r--fs/namei.c49
-rw-r--r--include/linux/devpts_fs.h9
-rw-r--r--include/linux/namei.h2
7 files changed, 126 insertions, 296 deletions
diff --git a/Documentation/filesystems/devpts.txt b/Documentation/filesystems/devpts.txt
index 30d2fcb32f72..9f94fe276dea 100644
--- a/Documentation/filesystems/devpts.txt
+++ b/Documentation/filesystems/devpts.txt
@@ -1,141 +1,26 @@
1Each mount of the devpts filesystem is now distinct such that ptys
2and their indicies allocated in one mount are independent from ptys
3and their indicies in all other mounts.
1 4
2To support containers, we now allow multiple instances of devpts filesystem, 5All mounts of the devpts filesystem now create a /dev/pts/ptmx node
3such that indices of ptys allocated in one instance are independent of indices 6with permissions 0000.
4allocated in other instances of devpts.
5 7
6To preserve backward compatibility, this support for multiple instances is 8To retain backwards compatibility the a ptmx device node (aka any node
7enabled only if: 9created with "mknod name c 5 2") when opened will look for an instance
10of devpts under the name "pts" in the same directory as the ptmx device
11node.
8 12
9 - CONFIG_DEVPTS_MULTIPLE_INSTANCES=y, and 13As an option instead of placing a /dev/ptmx device node at /dev/ptmx
10 - '-o newinstance' mount option is specified while mounting devpts 14it is possible to place a symlink to /dev/pts/ptmx at /dev/ptmx or
11 15to bind mount /dev/ptx/ptmx to /dev/ptmx. If you opt for using
12IOW, devpts now supports both single-instance and multi-instance semantics. 16the devpts filesystem in this manner devpts should be mounted with
13 17the ptmxmode=0666, or chmod 0666 /dev/pts/ptmx should be called.
14If CONFIG_DEVPTS_MULTIPLE_INSTANCES=n, there is no change in behavior and
15this referred to as the "legacy" mode. In this mode, the new mount options
16(-o newinstance and -o ptmxmode) will be ignored with a 'bogus option' message
17on console.
18
19If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and devpts is mounted without the
20'newinstance' option (as in current start-up scripts) the new mount binds
21to the initial kernel mount of devpts. This mode is referred to as the
22'single-instance' mode and the current, single-instance semantics are
23preserved, i.e PTYs are common across the system.
24
25The only difference between this single-instance mode and the legacy mode
26is the presence of new, '/dev/pts/ptmx' node with permissions 0000, which
27can safely be ignored.
28
29If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and 'newinstance' option is specified,
30the mount is considered to be in the multi-instance mode and a new instance
31of the devpts fs is created. Any ptys created in this instance are independent
32of ptys in other instances of devpts. Like in the single-instance mode, the
33/dev/pts/ptmx node is present. To effectively use the multi-instance mode,
34open of /dev/ptmx must be a redirected to '/dev/pts/ptmx' using a symlink or
35bind-mount.
36
37Eg: A container startup script could do the following:
38
39 $ chmod 0666 /dev/pts/ptmx
40 $ rm /dev/ptmx
41 $ ln -s pts/ptmx /dev/ptmx
42 $ ns_exec -cm /bin/bash
43
44 # We are now in new container
45
46 $ umount /dev/pts
47 $ mount -t devpts -o newinstance lxcpts /dev/pts
48 $ sshd -p 1234
49
50where 'ns_exec -cm /bin/bash' calls clone() with CLONE_NEWNS flag and execs
51/bin/bash in the child process. A pty created by the sshd is not visible in
52the original mount of /dev/pts.
53 18
54Total count of pty pairs in all instances is limited by sysctls: 19Total count of pty pairs in all instances is limited by sysctls:
55kernel.pty.max = 4096 - global limit 20kernel.pty.max = 4096 - global limit
56kernel.pty.reserve = 1024 - reserve for initial instance 21kernel.pty.reserve = 1024 - reserved for filesystems mounted from the initial mount namespace
57kernel.pty.nr - current count of ptys 22kernel.pty.nr - current count of ptys
58 23
59Per-instance limit could be set by adding mount option "max=<count>". 24Per-instance limit could be set by adding mount option "max=<count>".
60This feature was added in kernel 3.4 together with sysctl kernel.pty.reserve. 25This feature was added in kernel 3.4 together with sysctl kernel.pty.reserve.
61In kernels older than 3.4 sysctl kernel.pty.max works as per-instance limit. 26In kernels older than 3.4 sysctl kernel.pty.max works as per-instance limit.
62
63User-space changes
64------------------
65
66In multi-instance mode (i.e '-o newinstance' mount option is specified at least
67once), following user-space issues should be noted.
68
691. If -o newinstance mount option is never used, /dev/pts/ptmx can be ignored
70 and no change is needed to system-startup scripts.
71
722. To effectively use multi-instance mode (i.e -o newinstance is specified)
73 administrators or startup scripts should "redirect" open of /dev/ptmx to
74 /dev/pts/ptmx using either a bind mount or symlink.
75
76 $ mount -t devpts -o newinstance devpts /dev/pts
77
78 followed by either
79
80 $ rm /dev/ptmx
81 $ ln -s pts/ptmx /dev/ptmx
82 $ chmod 666 /dev/pts/ptmx
83 or
84 $ mount -o bind /dev/pts/ptmx /dev/ptmx
85
863. The '/dev/ptmx -> pts/ptmx' symlink is the preferred method since it
87 enables better error-reporting and treats both single-instance and
88 multi-instance mounts similarly.
89
90 But this method requires that system-startup scripts set the mode of
91 /dev/pts/ptmx correctly (default mode is 0000). The scripts can set the
92 mode by, either
93
94 - adding ptmxmode mount option to devpts entry in /etc/fstab, or
95 - using 'chmod 0666 /dev/pts/ptmx'
96
974. If multi-instance mode mount is needed for containers, but the system
98 startup scripts have not yet been updated, container-startup scripts
99 should bind mount /dev/ptmx to /dev/pts/ptmx to avoid breaking single-
100 instance mounts.
101
102 Or, in general, container-startup scripts should use:
103
104 mount -t devpts -o newinstance -o ptmxmode=0666 devpts /dev/pts
105 if [ ! -L /dev/ptmx ]; then
106 mount -o bind /dev/pts/ptmx /dev/ptmx
107 fi
108
109 When all devpts mounts are multi-instance, /dev/ptmx can permanently be
110 a symlink to pts/ptmx and the bind mount can be ignored.
111
1125. A multi-instance mount that is not accompanied by the /dev/ptmx to
113 /dev/pts/ptmx redirection would result in an unusable/unreachable pty.
114
115 mount -t devpts -o newinstance lxcpts /dev/pts
116
117 immediately followed by:
118
119 open("/dev/ptmx")
120
121 would create a pty, say /dev/pts/7, in the initial kernel mount.
122 But /dev/pts/7 would be invisible in the new mount.
123
1246. The permissions for /dev/pts/ptmx node should be specified when mounting
125 /dev/pts, using the '-o ptmxmode=%o' mount option (default is 0000).
126
127 mount -t devpts -o newinstance -o ptmxmode=0644 devpts /dev/pts
128
129 The permissions can be later be changed as usual with 'chmod'.
130
131 chmod 666 /dev/pts/ptmx
132
1337. A mount of devpts without the 'newinstance' option results in binding to
134 initial kernel mount. This behavior while preserving legacy semantics,
135 does not provide strict isolation in a container environment. i.e by
136 mounting devpts without the 'newinstance' option, a container could
137 get visibility into the 'host' or root container's devpts.
138
139 To workaround this and have strict isolation, all mounts of devpts,
140 including the mount in the root container, should use the newinstance
141 option.
diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index 82c4d2e45319..95103054c0e4 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -120,17 +120,6 @@ config UNIX98_PTYS
120 All modern Linux systems use the Unix98 ptys. Say Y unless 120 All modern Linux systems use the Unix98 ptys. Say Y unless
121 you're on an embedded system and want to conserve memory. 121 you're on an embedded system and want to conserve memory.
122 122
123config DEVPTS_MULTIPLE_INSTANCES
124 bool "Support multiple instances of devpts"
125 depends on UNIX98_PTYS
126 default n
127 ---help---
128 Enable support for multiple instances of devpts filesystem.
129 If you want to have isolated PTY namespaces (eg: in containers),
130 say Y here. Otherwise, say N. If enabled, each mount of devpts
131 filesystem with the '-o newinstance' option will create an
132 independent PTY namespace.
133
134config LEGACY_PTYS 123config LEGACY_PTYS
135 bool "Legacy (BSD) PTY support" 124 bool "Legacy (BSD) PTY support"
136 default y 125 default y
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index dd4b8417e7f4..f856c4544eea 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -668,7 +668,7 @@ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
668 else 668 else
669 fsi = tty->link->driver_data; 669 fsi = tty->link->driver_data;
670 devpts_kill_index(fsi, tty->index); 670 devpts_kill_index(fsi, tty->index);
671 devpts_put_ref(fsi); 671 devpts_release(fsi);
672} 672}
673 673
674static const struct tty_operations ptm_unix98_ops = { 674static const struct tty_operations ptm_unix98_ops = {
@@ -733,10 +733,11 @@ static int ptmx_open(struct inode *inode, struct file *filp)
733 if (retval) 733 if (retval)
734 return retval; 734 return retval;
735 735
736 fsi = devpts_get_ref(inode, filp); 736 fsi = devpts_acquire(filp);
737 retval = -ENODEV; 737 if (IS_ERR(fsi)) {
738 if (!fsi) 738 retval = PTR_ERR(fsi);
739 goto out_free_file; 739 goto out_free_file;
740 }
740 741
741 /* find a device that is not in use. */ 742 /* find a device that is not in use. */
742 mutex_lock(&devpts_mutex); 743 mutex_lock(&devpts_mutex);
@@ -745,7 +746,7 @@ static int ptmx_open(struct inode *inode, struct file *filp)
745 746
746 retval = index; 747 retval = index;
747 if (index < 0) 748 if (index < 0)
748 goto out_put_ref; 749 goto out_put_fsi;
749 750
750 751
751 mutex_lock(&tty_mutex); 752 mutex_lock(&tty_mutex);
@@ -789,8 +790,8 @@ err_release:
789 return retval; 790 return retval;
790out: 791out:
791 devpts_kill_index(fsi, index); 792 devpts_kill_index(fsi, index);
792out_put_ref: 793out_put_fsi:
793 devpts_put_ref(fsi); 794 devpts_release(fsi);
794out_free_file: 795out_free_file:
795 tty_free_file(filp); 796 tty_free_file(filp);
796 return retval; 797 return retval;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 0b2954d7172d..37c134a132c7 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -95,8 +95,6 @@ static struct ctl_table pty_root_table[] = {
95 95
96static DEFINE_MUTEX(allocated_ptys_lock); 96static DEFINE_MUTEX(allocated_ptys_lock);
97 97
98static struct vfsmount *devpts_mnt;
99
100struct pts_mount_opts { 98struct pts_mount_opts {
101 int setuid; 99 int setuid;
102 int setgid; 100 int setgid;
@@ -104,7 +102,7 @@ struct pts_mount_opts {
104 kgid_t gid; 102 kgid_t gid;
105 umode_t mode; 103 umode_t mode;
106 umode_t ptmxmode; 104 umode_t ptmxmode;
107 int newinstance; 105 int reserve;
108 int max; 106 int max;
109}; 107};
110 108
@@ -117,11 +115,9 @@ static const match_table_t tokens = {
117 {Opt_uid, "uid=%u"}, 115 {Opt_uid, "uid=%u"},
118 {Opt_gid, "gid=%u"}, 116 {Opt_gid, "gid=%u"},
119 {Opt_mode, "mode=%o"}, 117 {Opt_mode, "mode=%o"},
120#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
121 {Opt_ptmxmode, "ptmxmode=%o"}, 118 {Opt_ptmxmode, "ptmxmode=%o"},
122 {Opt_newinstance, "newinstance"}, 119 {Opt_newinstance, "newinstance"},
123 {Opt_max, "max=%d"}, 120 {Opt_max, "max=%d"},
124#endif
125 {Opt_err, NULL} 121 {Opt_err, NULL}
126}; 122};
127 123
@@ -137,15 +133,48 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
137 return sb->s_fs_info; 133 return sb->s_fs_info;
138} 134}
139 135
140static inline struct super_block *pts_sb_from_inode(struct inode *inode) 136struct pts_fs_info *devpts_acquire(struct file *filp)
141{ 137{
142#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES 138 struct pts_fs_info *result;
143 if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) 139 struct path path;
144 return inode->i_sb; 140 struct super_block *sb;
145#endif 141 int err;
146 if (!devpts_mnt) 142
147 return NULL; 143 path = filp->f_path;
148 return devpts_mnt->mnt_sb; 144 path_get(&path);
145
146 /* Has the devpts filesystem already been found? */
147 sb = path.mnt->mnt_sb;
148 if (sb->s_magic != DEVPTS_SUPER_MAGIC) {
149 /* Is a devpts filesystem at "pts" in the same directory? */
150 err = path_pts(&path);
151 if (err) {
152 result = ERR_PTR(err);
153 goto out;
154 }
155
156 /* Is the path the root of a devpts filesystem? */
157 result = ERR_PTR(-ENODEV);
158 sb = path.mnt->mnt_sb;
159 if ((sb->s_magic != DEVPTS_SUPER_MAGIC) ||
160 (path.mnt->mnt_root != sb->s_root))
161 goto out;
162 }
163
164 /*
165 * pty code needs to hold extra references in case of last /dev/tty close
166 */
167 atomic_inc(&sb->s_active);
168 result = DEVPTS_SB(sb);
169
170out:
171 path_put(&path);
172 return result;
173}
174
175void devpts_release(struct pts_fs_info *fsi)
176{
177 deactivate_super(fsi->sb);
149} 178}
150 179
151#define PARSE_MOUNT 0 180#define PARSE_MOUNT 0
@@ -154,9 +183,7 @@ static inline struct super_block *pts_sb_from_inode(struct inode *inode)
154/* 183/*
155 * parse_mount_options(): 184 * parse_mount_options():
156 * Set @opts to mount options specified in @data. If an option is not 185 * Set @opts to mount options specified in @data. If an option is not
157 * specified in @data, set it to its default value. The exception is 186 * specified in @data, set it to its default value.
158 * 'newinstance' option which can only be set/cleared on a mount (i.e.
159 * cannot be changed during remount).
160 * 187 *
161 * Note: @data may be NULL (in which case all options are set to default). 188 * Note: @data may be NULL (in which case all options are set to default).
162 */ 189 */
@@ -174,9 +201,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
174 opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE; 201 opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
175 opts->max = NR_UNIX98_PTY_MAX; 202 opts->max = NR_UNIX98_PTY_MAX;
176 203
177 /* newinstance makes sense only on initial mount */ 204 /* Only allow instances mounted from the initial mount
205 * namespace to tap the reserve pool of ptys.
206 */
178 if (op == PARSE_MOUNT) 207 if (op == PARSE_MOUNT)
179 opts->newinstance = 0; 208 opts->reserve =
209 (current->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns);
180 210
181 while ((p = strsep(&data, ",")) != NULL) { 211 while ((p = strsep(&data, ",")) != NULL) {
182 substring_t args[MAX_OPT_ARGS]; 212 substring_t args[MAX_OPT_ARGS];
@@ -211,16 +241,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
211 return -EINVAL; 241 return -EINVAL;
212 opts->mode = option & S_IALLUGO; 242 opts->mode = option & S_IALLUGO;
213 break; 243 break;
214#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
215 case Opt_ptmxmode: 244 case Opt_ptmxmode:
216 if (match_octal(&args[0], &option)) 245 if (match_octal(&args[0], &option))
217 return -EINVAL; 246 return -EINVAL;
218 opts->ptmxmode = option & S_IALLUGO; 247 opts->ptmxmode = option & S_IALLUGO;
219 break; 248 break;
220 case Opt_newinstance: 249 case Opt_newinstance:
221 /* newinstance makes sense only on initial mount */
222 if (op == PARSE_MOUNT)
223 opts->newinstance = 1;
224 break; 250 break;
225 case Opt_max: 251 case Opt_max:
226 if (match_int(&args[0], &option) || 252 if (match_int(&args[0], &option) ||
@@ -228,7 +254,6 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
228 return -EINVAL; 254 return -EINVAL;
229 opts->max = option; 255 opts->max = option;
230 break; 256 break;
231#endif
232 default: 257 default:
233 pr_err("called with bogus options\n"); 258 pr_err("called with bogus options\n");
234 return -EINVAL; 259 return -EINVAL;
@@ -238,7 +263,6 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
238 return 0; 263 return 0;
239} 264}
240 265
241#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
242static int mknod_ptmx(struct super_block *sb) 266static int mknod_ptmx(struct super_block *sb)
243{ 267{
244 int mode; 268 int mode;
@@ -305,12 +329,6 @@ static void update_ptmx_mode(struct pts_fs_info *fsi)
305 inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode; 329 inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode;
306 } 330 }
307} 331}
308#else
309static inline void update_ptmx_mode(struct pts_fs_info *fsi)
310{
311 return;
312}
313#endif
314 332
315static int devpts_remount(struct super_block *sb, int *flags, char *data) 333static int devpts_remount(struct super_block *sb, int *flags, char *data)
316{ 334{
@@ -344,11 +362,9 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root)
344 seq_printf(seq, ",gid=%u", 362 seq_printf(seq, ",gid=%u",
345 from_kgid_munged(&init_user_ns, opts->gid)); 363 from_kgid_munged(&init_user_ns, opts->gid));
346 seq_printf(seq, ",mode=%03o", opts->mode); 364 seq_printf(seq, ",mode=%03o", opts->mode);
347#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
348 seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode); 365 seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
349 if (opts->max < NR_UNIX98_PTY_MAX) 366 if (opts->max < NR_UNIX98_PTY_MAX)
350 seq_printf(seq, ",max=%d", opts->max); 367 seq_printf(seq, ",max=%d", opts->max);
351#endif
352 368
353 return 0; 369 return 0;
354} 370}
@@ -410,40 +426,11 @@ fail:
410 return -ENOMEM; 426 return -ENOMEM;
411} 427}
412 428
413#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
414static int compare_init_pts_sb(struct super_block *s, void *p)
415{
416 if (devpts_mnt)
417 return devpts_mnt->mnt_sb == s;
418 return 0;
419}
420
421/* 429/*
422 * devpts_mount() 430 * devpts_mount()
423 * 431 *
424 * If the '-o newinstance' mount option was specified, mount a new 432 * Mount a new (private) instance of devpts. PTYs created in this
425 * (private) instance of devpts. PTYs created in this instance are 433 * instance are independent of the PTYs in other devpts instances.
426 * independent of the PTYs in other devpts instances.
427 *
428 * If the '-o newinstance' option was not specified, mount/remount the
429 * initial kernel mount of devpts. This type of mount gives the
430 * legacy, single-instance semantics.
431 *
432 * The 'newinstance' option is needed to support multiple namespace
433 * semantics in devpts while preserving backward compatibility of the
434 * current 'single-namespace' semantics. i.e all mounts of devpts
435 * without the 'newinstance' mount option should bind to the initial
436 * kernel mount, like mount_single().
437 *
438 * Mounts with 'newinstance' option create a new, private namespace.
439 *
440 * NOTE:
441 *
442 * For single-mount semantics, devpts cannot use mount_single(),
443 * because mount_single()/sget() find and use the super-block from
444 * the most recent mount of devpts. But that recent mount may be a
445 * 'newinstance' mount and mount_single() would pick the newinstance
446 * super-block instead of the initial super-block.
447 */ 434 */
448static struct dentry *devpts_mount(struct file_system_type *fs_type, 435static struct dentry *devpts_mount(struct file_system_type *fs_type,
449 int flags, const char *dev_name, void *data) 436 int flags, const char *dev_name, void *data)
@@ -456,18 +443,7 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type,
456 if (error) 443 if (error)
457 return ERR_PTR(error); 444 return ERR_PTR(error);
458 445
459 /* Require newinstance for all user namespace mounts to ensure 446 s = sget(fs_type, NULL, set_anon_super, flags, NULL);
460 * the mount options are not changed.
461 */
462 if ((current_user_ns() != &init_user_ns) && !opts.newinstance)
463 return ERR_PTR(-EINVAL);
464
465 if (opts.newinstance)
466 s = sget(fs_type, NULL, set_anon_super, flags, NULL);
467 else
468 s = sget(fs_type, compare_init_pts_sb, set_anon_super, flags,
469 NULL);
470
471 if (IS_ERR(s)) 447 if (IS_ERR(s))
472 return ERR_CAST(s); 448 return ERR_CAST(s);
473 449
@@ -491,18 +467,6 @@ out_undo_sget:
491 return ERR_PTR(error); 467 return ERR_PTR(error);
492} 468}
493 469
494#else
495/*
496 * This supports only the legacy single-instance semantics (no
497 * multiple-instance semantics)
498 */
499static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags,
500 const char *dev_name, void *data)
501{
502 return mount_single(fs_type, flags, data, devpts_fill_super);
503}
504#endif
505
506static void devpts_kill_sb(struct super_block *sb) 470static void devpts_kill_sb(struct super_block *sb)
507{ 471{
508 struct pts_fs_info *fsi = DEVPTS_SB(sb); 472 struct pts_fs_info *fsi = DEVPTS_SB(sb);
@@ -516,9 +480,7 @@ static struct file_system_type devpts_fs_type = {
516 .name = "devpts", 480 .name = "devpts",
517 .mount = devpts_mount, 481 .mount = devpts_mount,
518 .kill_sb = devpts_kill_sb, 482 .kill_sb = devpts_kill_sb,
519#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
520 .fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT, 483 .fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT,
521#endif
522}; 484};
523 485
524/* 486/*
@@ -531,16 +493,13 @@ int devpts_new_index(struct pts_fs_info *fsi)
531 int index; 493 int index;
532 int ida_ret; 494 int ida_ret;
533 495
534 if (!fsi)
535 return -ENODEV;
536
537retry: 496retry:
538 if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL)) 497 if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL))
539 return -ENOMEM; 498 return -ENOMEM;
540 499
541 mutex_lock(&allocated_ptys_lock); 500 mutex_lock(&allocated_ptys_lock);
542 if (pty_count >= pty_limit - 501 if (pty_count >= (pty_limit -
543 (fsi->mount_opts.newinstance ? pty_reserve : 0)) { 502 (fsi->mount_opts.reserve ? 0 : pty_reserve))) {
544 mutex_unlock(&allocated_ptys_lock); 503 mutex_unlock(&allocated_ptys_lock);
545 return -ENOSPC; 504 return -ENOSPC;
546 } 505 }
@@ -571,30 +530,6 @@ void devpts_kill_index(struct pts_fs_info *fsi, int idx)
571 mutex_unlock(&allocated_ptys_lock); 530 mutex_unlock(&allocated_ptys_lock);
572} 531}
573 532
574/*
575 * pty code needs to hold extra references in case of last /dev/tty close
576 */
577struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file)
578{
579 struct super_block *sb;
580 struct pts_fs_info *fsi;
581
582 sb = pts_sb_from_inode(ptmx_inode);
583 if (!sb)
584 return NULL;
585 fsi = DEVPTS_SB(sb);
586 if (!fsi)
587 return NULL;
588
589 atomic_inc(&sb->s_active);
590 return fsi;
591}
592
593void devpts_put_ref(struct pts_fs_info *fsi)
594{
595 deactivate_super(fsi->sb);
596}
597
598/** 533/**
599 * devpts_pty_new -- create a new inode in /dev/pts/ 534 * devpts_pty_new -- create a new inode in /dev/pts/
600 * @ptmx_inode: inode of the master 535 * @ptmx_inode: inode of the master
@@ -607,16 +542,12 @@ void devpts_put_ref(struct pts_fs_info *fsi)
607struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv) 542struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
608{ 543{
609 struct dentry *dentry; 544 struct dentry *dentry;
610 struct super_block *sb; 545 struct super_block *sb = fsi->sb;
611 struct inode *inode; 546 struct inode *inode;
612 struct dentry *root; 547 struct dentry *root;
613 struct pts_mount_opts *opts; 548 struct pts_mount_opts *opts;
614 char s[12]; 549 char s[12];
615 550
616 if (!fsi)
617 return ERR_PTR(-ENODEV);
618
619 sb = fsi->sb;
620 root = sb->s_root; 551 root = sb->s_root;
621 opts = &fsi->mount_opts; 552 opts = &fsi->mount_opts;
622 553
@@ -676,20 +607,8 @@ void devpts_pty_kill(struct dentry *dentry)
676static int __init init_devpts_fs(void) 607static int __init init_devpts_fs(void)
677{ 608{
678 int err = register_filesystem(&devpts_fs_type); 609 int err = register_filesystem(&devpts_fs_type);
679 struct ctl_table_header *table;
680
681 if (!err) { 610 if (!err) {
682 struct vfsmount *mnt; 611 register_sysctl_table(pty_root_table);
683
684 table = register_sysctl_table(pty_root_table);
685 mnt = kern_mount(&devpts_fs_type);
686 if (IS_ERR(mnt)) {
687 err = PTR_ERR(mnt);
688 unregister_filesystem(&devpts_fs_type);
689 unregister_sysctl_table(table);
690 } else {
691 devpts_mnt = mnt;
692 }
693 } 612 }
694 return err; 613 return err;
695} 614}
diff --git a/fs/namei.c b/fs/namei.c
index 4c4f95ac8aa5..6a82fb7e2127 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1416,21 +1416,28 @@ static void follow_mount(struct path *path)
1416 } 1416 }
1417} 1417}
1418 1418
1419static int path_parent_directory(struct path *path)
1420{
1421 struct dentry *old = path->dentry;
1422 /* rare case of legitimate dget_parent()... */
1423 path->dentry = dget_parent(path->dentry);
1424 dput(old);
1425 if (unlikely(!path_connected(path)))
1426 return -ENOENT;
1427 return 0;
1428}
1429
1419static int follow_dotdot(struct nameidata *nd) 1430static int follow_dotdot(struct nameidata *nd)
1420{ 1431{
1421 while(1) { 1432 while(1) {
1422 struct dentry *old = nd->path.dentry;
1423
1424 if (nd->path.dentry == nd->root.dentry && 1433 if (nd->path.dentry == nd->root.dentry &&
1425 nd->path.mnt == nd->root.mnt) { 1434 nd->path.mnt == nd->root.mnt) {
1426 break; 1435 break;
1427 } 1436 }
1428 if (nd->path.dentry != nd->path.mnt->mnt_root) { 1437 if (nd->path.dentry != nd->path.mnt->mnt_root) {
1429 /* rare case of legitimate dget_parent()... */ 1438 int ret = path_parent_directory(&nd->path);
1430 nd->path.dentry = dget_parent(nd->path.dentry); 1439 if (ret)
1431 dput(old); 1440 return ret;
1432 if (unlikely(!path_connected(&nd->path)))
1433 return -ENOENT;
1434 break; 1441 break;
1435 } 1442 }
1436 if (!follow_up(&nd->path)) 1443 if (!follow_up(&nd->path))
@@ -2514,6 +2521,34 @@ struct dentry *lookup_one_len_unlocked(const char *name,
2514} 2521}
2515EXPORT_SYMBOL(lookup_one_len_unlocked); 2522EXPORT_SYMBOL(lookup_one_len_unlocked);
2516 2523
2524#ifdef CONFIG_UNIX98_PTYS
2525int path_pts(struct path *path)
2526{
2527 /* Find something mounted on "pts" in the same directory as
2528 * the input path.
2529 */
2530 struct dentry *child, *parent;
2531 struct qstr this;
2532 int ret;
2533
2534 ret = path_parent_directory(path);
2535 if (ret)
2536 return ret;
2537
2538 parent = path->dentry;
2539 this.name = "pts";
2540 this.len = 3;
2541 child = d_hash_and_lookup(parent, &this);
2542 if (!child)
2543 return -ENOENT;
2544
2545 path->dentry = child;
2546 dput(parent);
2547 follow_mount(path);
2548 return 0;
2549}
2550#endif
2551
2517int user_path_at_empty(int dfd, const char __user *name, unsigned flags, 2552int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
2518 struct path *path, int *empty) 2553 struct path *path, int *empty)
2519{ 2554{
diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h
index 5871f292b596..277ab9af9ac2 100644
--- a/include/linux/devpts_fs.h
+++ b/include/linux/devpts_fs.h
@@ -15,13 +15,12 @@
15 15
16#include <linux/errno.h> 16#include <linux/errno.h>
17 17
18struct pts_fs_info;
19
20#ifdef CONFIG_UNIX98_PTYS 18#ifdef CONFIG_UNIX98_PTYS
21 19
22/* Look up a pts fs info and get a ref to it */ 20struct pts_fs_info;
23struct pts_fs_info *devpts_get_ref(struct inode *, struct file *); 21
24void devpts_put_ref(struct pts_fs_info *); 22struct pts_fs_info *devpts_acquire(struct file *);
23void devpts_release(struct pts_fs_info *);
25 24
26int devpts_new_index(struct pts_fs_info *); 25int devpts_new_index(struct pts_fs_info *);
27void devpts_kill_index(struct pts_fs_info *, int); 26void devpts_kill_index(struct pts_fs_info *, int);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index ec5ec2818a28..d3d0398f2a1b 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -45,6 +45,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
45#define LOOKUP_ROOT 0x2000 45#define LOOKUP_ROOT 0x2000
46#define LOOKUP_EMPTY 0x4000 46#define LOOKUP_EMPTY 0x4000
47 47
48extern int path_pts(struct path *path);
49
48extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty); 50extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty);
49 51
50static inline int user_path_at(int dfd, const char __user *name, unsigned flags, 52static inline int user_path_at(int dfd, const char __user *name, unsigned flags,