summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl3
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--fs/file_table.c9
-rw-r--r--fs/internal.h1
-rw-r--r--fs/namespace.c157
-rw-r--r--include/linux/fs.h7
-rw-r--r--include/linux/syscalls.h1
-rw-r--r--include/uapi/linux/fcntl.h2
-rw-r--r--include/uapi/linux/mount.h6
9 files changed, 159 insertions, 28 deletions
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 1f9607ed087c..ae2294d07ecb 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -398,7 +398,8 @@
398384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl 398384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
399385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents 399385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents
400386 i386 rseq sys_rseq __ia32_sys_rseq 400386 i386 rseq sys_rseq __ia32_sys_rseq
401# don't use numbers 387 through 392, add new calls at the end 401387 i386 open_tree sys_open_tree __ia32_sys_open_tree
402# don't use numbers 388 through 392, add new calls at the end
402393 i386 semget sys_semget __ia32_sys_semget 403393 i386 semget sys_semget __ia32_sys_semget
403394 i386 semctl sys_semctl __ia32_compat_sys_semctl 404394 i386 semctl sys_semctl __ia32_compat_sys_semctl
404395 i386 shmget sys_shmget __ia32_sys_shmget 405395 i386 shmget sys_shmget __ia32_sys_shmget
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 92ee0b4378d4..a6e06c35b5b1 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -343,6 +343,7 @@
343332 common statx __x64_sys_statx 343332 common statx __x64_sys_statx
344333 common io_pgetevents __x64_sys_io_pgetevents 344333 common io_pgetevents __x64_sys_io_pgetevents
345334 common rseq __x64_sys_rseq 345334 common rseq __x64_sys_rseq
346335 common open_tree __x64_sys_open_tree
346# don't use numbers 387 through 423, add new calls after the last 347# don't use numbers 387 through 423, add new calls after the last
347# 'common' entry 348# 'common' entry
348424 common pidfd_send_signal __x64_sys_pidfd_send_signal 349424 common pidfd_send_signal __x64_sys_pidfd_send_signal
diff --git a/fs/file_table.c b/fs/file_table.c
index 155d7514a094..3f9c1b452c1d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -255,6 +255,7 @@ static void __fput(struct file *file)
255 struct dentry *dentry = file->f_path.dentry; 255 struct dentry *dentry = file->f_path.dentry;
256 struct vfsmount *mnt = file->f_path.mnt; 256 struct vfsmount *mnt = file->f_path.mnt;
257 struct inode *inode = file->f_inode; 257 struct inode *inode = file->f_inode;
258 fmode_t mode = file->f_mode;
258 259
259 if (unlikely(!(file->f_mode & FMODE_OPENED))) 260 if (unlikely(!(file->f_mode & FMODE_OPENED)))
260 goto out; 261 goto out;
@@ -277,18 +278,20 @@ static void __fput(struct file *file)
277 if (file->f_op->release) 278 if (file->f_op->release)
278 file->f_op->release(inode, file); 279 file->f_op->release(inode, file);
279 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && 280 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
280 !(file->f_mode & FMODE_PATH))) { 281 !(mode & FMODE_PATH))) {
281 cdev_put(inode->i_cdev); 282 cdev_put(inode->i_cdev);
282 } 283 }
283 fops_put(file->f_op); 284 fops_put(file->f_op);
284 put_pid(file->f_owner.pid); 285 put_pid(file->f_owner.pid);
285 if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) 286 if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
286 i_readcount_dec(inode); 287 i_readcount_dec(inode);
287 if (file->f_mode & FMODE_WRITER) { 288 if (mode & FMODE_WRITER) {
288 put_write_access(inode); 289 put_write_access(inode);
289 __mnt_drop_write(mnt); 290 __mnt_drop_write(mnt);
290 } 291 }
291 dput(dentry); 292 dput(dentry);
293 if (unlikely(mode & FMODE_NEED_UNMOUNT))
294 dissolve_on_fput(mnt);
292 mntput(mnt); 295 mntput(mnt);
293out: 296out:
294 file_free(file); 297 file_free(file);
diff --git a/fs/internal.h b/fs/internal.h
index 6a8b71643af4..f3a027c44758 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -94,6 +94,7 @@ extern int __mnt_want_write_file(struct file *);
94extern void __mnt_drop_write(struct vfsmount *); 94extern void __mnt_drop_write(struct vfsmount *);
95extern void __mnt_drop_write_file(struct file *); 95extern void __mnt_drop_write_file(struct file *);
96 96
97extern void dissolve_on_fput(struct vfsmount *);
97/* 98/*
98 * fs_struct.c 99 * fs_struct.c
99 */ 100 */
diff --git a/fs/namespace.c b/fs/namespace.c
index c9cab307fa77..b804a1a497ee 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -20,6 +20,7 @@
20#include <linux/init.h> /* init_rootfs */ 20#include <linux/init.h> /* init_rootfs */
21#include <linux/fs_struct.h> /* get_fs_root et.al. */ 21#include <linux/fs_struct.h> /* get_fs_root et.al. */
22#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ 22#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
23#include <linux/file.h>
23#include <linux/uaccess.h> 24#include <linux/uaccess.h>
24#include <linux/proc_ns.h> 25#include <linux/proc_ns.h>
25#include <linux/magic.h> 26#include <linux/magic.h>
@@ -1832,6 +1833,21 @@ struct vfsmount *collect_mounts(const struct path *path)
1832 return &tree->mnt; 1833 return &tree->mnt;
1833} 1834}
1834 1835
1836static void free_mnt_ns(struct mnt_namespace *);
1837static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool);
1838
1839void dissolve_on_fput(struct vfsmount *mnt)
1840{
1841 struct mnt_namespace *ns;
1842 namespace_lock();
1843 lock_mount_hash();
1844 ns = real_mount(mnt)->mnt_ns;
1845 umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
1846 unlock_mount_hash();
1847 namespace_unlock();
1848 free_mnt_ns(ns);
1849}
1850
1835void drop_collected_mounts(struct vfsmount *mnt) 1851void drop_collected_mounts(struct vfsmount *mnt)
1836{ 1852{
1837 namespace_lock(); 1853 namespace_lock();
@@ -2222,6 +2238,30 @@ static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
2222 return false; 2238 return false;
2223} 2239}
2224 2240
2241static struct mount *__do_loopback(struct path *old_path, int recurse)
2242{
2243 struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
2244
2245 if (IS_MNT_UNBINDABLE(old))
2246 return mnt;
2247
2248 if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)
2249 return mnt;
2250
2251 if (!recurse && has_locked_children(old, old_path->dentry))
2252 return mnt;
2253
2254 if (recurse)
2255 mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
2256 else
2257 mnt = clone_mnt(old, old_path->dentry, 0);
2258
2259 if (!IS_ERR(mnt))
2260 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2261
2262 return mnt;
2263}
2264
2225/* 2265/*
2226 * do loopback mount. 2266 * do loopback mount.
2227 */ 2267 */
@@ -2229,7 +2269,7 @@ static int do_loopback(struct path *path, const char *old_name,
2229 int recurse) 2269 int recurse)
2230{ 2270{
2231 struct path old_path; 2271 struct path old_path;
2232 struct mount *mnt = NULL, *old, *parent; 2272 struct mount *mnt = NULL, *parent;
2233 struct mountpoint *mp; 2273 struct mountpoint *mp;
2234 int err; 2274 int err;
2235 if (!old_name || !*old_name) 2275 if (!old_name || !*old_name)
@@ -2243,38 +2283,21 @@ static int do_loopback(struct path *path, const char *old_name,
2243 goto out; 2283 goto out;
2244 2284
2245 mp = lock_mount(path); 2285 mp = lock_mount(path);
2246 err = PTR_ERR(mp); 2286 if (IS_ERR(mp)) {
2247 if (IS_ERR(mp)) 2287 err = PTR_ERR(mp);
2248 goto out; 2288 goto out;
2289 }
2249 2290
2250 old = real_mount(old_path.mnt);
2251 parent = real_mount(path->mnt); 2291 parent = real_mount(path->mnt);
2252
2253 err = -EINVAL;
2254 if (IS_MNT_UNBINDABLE(old))
2255 goto out2;
2256
2257 if (!check_mnt(parent)) 2292 if (!check_mnt(parent))
2258 goto out2; 2293 goto out2;
2259 2294
2260 if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations) 2295 mnt = __do_loopback(&old_path, recurse);
2261 goto out2;
2262
2263 if (!recurse && has_locked_children(old, old_path.dentry))
2264 goto out2;
2265
2266 if (recurse)
2267 mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
2268 else
2269 mnt = clone_mnt(old, old_path.dentry, 0);
2270
2271 if (IS_ERR(mnt)) { 2296 if (IS_ERR(mnt)) {
2272 err = PTR_ERR(mnt); 2297 err = PTR_ERR(mnt);
2273 goto out2; 2298 goto out2;
2274 } 2299 }
2275 2300
2276 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2277
2278 err = graft_tree(mnt, parent, mp); 2301 err = graft_tree(mnt, parent, mp);
2279 if (err) { 2302 if (err) {
2280 lock_mount_hash(); 2303 lock_mount_hash();
@@ -2288,6 +2311,96 @@ out:
2288 return err; 2311 return err;
2289} 2312}
2290 2313
2314static struct file *open_detached_copy(struct path *path, bool recursive)
2315{
2316 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2317 struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true);
2318 struct mount *mnt, *p;
2319 struct file *file;
2320
2321 if (IS_ERR(ns))
2322 return ERR_CAST(ns);
2323
2324 namespace_lock();
2325 mnt = __do_loopback(path, recursive);
2326 if (IS_ERR(mnt)) {
2327 namespace_unlock();
2328 free_mnt_ns(ns);
2329 return ERR_CAST(mnt);
2330 }
2331
2332 lock_mount_hash();
2333 for (p = mnt; p; p = next_mnt(p, mnt)) {
2334 p->mnt_ns = ns;
2335 ns->mounts++;
2336 }
2337 ns->root = mnt;
2338 list_add_tail(&ns->list, &mnt->mnt_list);
2339 mntget(&mnt->mnt);
2340 unlock_mount_hash();
2341 namespace_unlock();
2342
2343 mntput(path->mnt);
2344 path->mnt = &mnt->mnt;
2345 file = dentry_open(path, O_PATH, current_cred());
2346 if (IS_ERR(file))
2347 dissolve_on_fput(path->mnt);
2348 else
2349 file->f_mode |= FMODE_NEED_UNMOUNT;
2350 return file;
2351}
2352
2353SYSCALL_DEFINE3(open_tree, int, dfd, const char *, filename, unsigned, flags)
2354{
2355 struct file *file;
2356 struct path path;
2357 int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
2358 bool detached = flags & OPEN_TREE_CLONE;
2359 int error;
2360 int fd;
2361
2362 BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
2363
2364 if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
2365 AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
2366 OPEN_TREE_CLOEXEC))
2367 return -EINVAL;
2368
2369 if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
2370 return -EINVAL;
2371
2372 if (flags & AT_NO_AUTOMOUNT)
2373 lookup_flags &= ~LOOKUP_AUTOMOUNT;
2374 if (flags & AT_SYMLINK_NOFOLLOW)
2375 lookup_flags &= ~LOOKUP_FOLLOW;
2376 if (flags & AT_EMPTY_PATH)
2377 lookup_flags |= LOOKUP_EMPTY;
2378
2379 if (detached && !may_mount())
2380 return -EPERM;
2381
2382 fd = get_unused_fd_flags(flags & O_CLOEXEC);
2383 if (fd < 0)
2384 return fd;
2385
2386 error = user_path_at(dfd, filename, lookup_flags, &path);
2387 if (unlikely(error)) {
2388 file = ERR_PTR(error);
2389 } else {
2390 if (detached)
2391 file = open_detached_copy(&path, flags & AT_RECURSIVE);
2392 else
2393 file = dentry_open(&path, O_PATH, current_cred());
2394 path_put(&path);
2395 }
2396 if (IS_ERR(file)) {
2397 put_unused_fd(fd);
2398 return PTR_ERR(file);
2399 }
2400 fd_install(fd, file);
2401 return fd;
2402}
2403
2291/* 2404/*
2292 * Don't allow locked mount flags to be cleared. 2405 * Don't allow locked mount flags to be cleared.
2293 * 2406 *
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8b42df09b04c..09b05ec5d059 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -162,10 +162,13 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
162#define FMODE_NONOTIFY ((__force fmode_t)0x4000000) 162#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
163 163
164/* File is capable of returning -EAGAIN if I/O will block */ 164/* File is capable of returning -EAGAIN if I/O will block */
165#define FMODE_NOWAIT ((__force fmode_t)0x8000000) 165#define FMODE_NOWAIT ((__force fmode_t)0x8000000)
166
167/* File represents mount that needs unmounting */
168#define FMODE_NEED_UNMOUNT ((__force fmode_t)0x10000000)
166 169
167/* File does not contribute to nr_files count */ 170/* File does not contribute to nr_files count */
168#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) 171#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)
169 172
170/* 173/*
171 * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector 174 * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e446806a561f..6c29d586e66b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -985,6 +985,7 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags,
985 unsigned mask, struct statx __user *buffer); 985 unsigned mask, struct statx __user *buffer);
986asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, 986asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len,
987 int flags, uint32_t sig); 987 int flags, uint32_t sig);
988asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags);
988asmlinkage long sys_pidfd_send_signal(int pidfd, int sig, 989asmlinkage long sys_pidfd_send_signal(int pidfd, int sig,
989 siginfo_t __user *info, 990 siginfo_t __user *info,
990 unsigned int flags); 991 unsigned int flags);
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index a2f8658f1c55..1d338357df8a 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -91,5 +91,7 @@
91#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ 91#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */
92#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ 92#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */
93 93
94#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
95
94 96
95#endif /* _UAPI_LINUX_FCNTL_H */ 97#endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index 3f9ec42510b0..fd7ae2e7eccf 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -55,4 +55,10 @@
55#define MS_MGC_VAL 0xC0ED0000 55#define MS_MGC_VAL 0xC0ED0000
56#define MS_MGC_MSK 0xffff0000 56#define MS_MGC_MSK 0xffff0000
57 57
58/*
59 * open_tree() flags.
60 */
61#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */
62#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */
63
58#endif /* _UAPI_LINUX_MOUNT_H */ 64#endif /* _UAPI_LINUX_MOUNT_H */