diff options
-rw-r--r-- | arch/x86/entry/syscalls/syscall_32.tbl | 3 | ||||
-rw-r--r-- | arch/x86/entry/syscalls/syscall_64.tbl | 1 | ||||
-rw-r--r-- | fs/file_table.c | 9 | ||||
-rw-r--r-- | fs/internal.h | 1 | ||||
-rw-r--r-- | fs/namespace.c | 157 | ||||
-rw-r--r-- | include/linux/fs.h | 7 | ||||
-rw-r--r-- | include/linux/syscalls.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/fcntl.h | 2 | ||||
-rw-r--r-- | include/uapi/linux/mount.h | 6 |
9 files changed, 159 insertions, 28 deletions
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 1f9607ed087c..ae2294d07ecb 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl | |||
@@ -398,7 +398,8 @@ | |||
398 | 384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl | 398 | 384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl |
399 | 385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents | 399 | 385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents |
400 | 386 i386 rseq sys_rseq __ia32_sys_rseq | 400 | 386 i386 rseq sys_rseq __ia32_sys_rseq |
401 | # don't use numbers 387 through 392, add new calls at the end | 401 | 387 i386 open_tree sys_open_tree __ia32_sys_open_tree |
402 | # don't use numbers 388 through 392, add new calls at the end | ||
402 | 393 i386 semget sys_semget __ia32_sys_semget | 403 | 393 i386 semget sys_semget __ia32_sys_semget |
403 | 394 i386 semctl sys_semctl __ia32_compat_sys_semctl | 404 | 394 i386 semctl sys_semctl __ia32_compat_sys_semctl |
404 | 395 i386 shmget sys_shmget __ia32_sys_shmget | 405 | 395 i386 shmget sys_shmget __ia32_sys_shmget |
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 92ee0b4378d4..a6e06c35b5b1 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl | |||
@@ -343,6 +343,7 @@ | |||
343 | 332 common statx __x64_sys_statx | 343 | 332 common statx __x64_sys_statx |
344 | 333 common io_pgetevents __x64_sys_io_pgetevents | 344 | 333 common io_pgetevents __x64_sys_io_pgetevents |
345 | 334 common rseq __x64_sys_rseq | 345 | 334 common rseq __x64_sys_rseq |
346 | 335 common open_tree __x64_sys_open_tree | ||
346 | # don't use numbers 387 through 423, add new calls after the last | 347 | # don't use numbers 387 through 423, add new calls after the last |
347 | # 'common' entry | 348 | # 'common' entry |
348 | 424 common pidfd_send_signal __x64_sys_pidfd_send_signal | 349 | 424 common pidfd_send_signal __x64_sys_pidfd_send_signal |
diff --git a/fs/file_table.c b/fs/file_table.c index 155d7514a094..3f9c1b452c1d 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -255,6 +255,7 @@ static void __fput(struct file *file) | |||
255 | struct dentry *dentry = file->f_path.dentry; | 255 | struct dentry *dentry = file->f_path.dentry; |
256 | struct vfsmount *mnt = file->f_path.mnt; | 256 | struct vfsmount *mnt = file->f_path.mnt; |
257 | struct inode *inode = file->f_inode; | 257 | struct inode *inode = file->f_inode; |
258 | fmode_t mode = file->f_mode; | ||
258 | 259 | ||
259 | if (unlikely(!(file->f_mode & FMODE_OPENED))) | 260 | if (unlikely(!(file->f_mode & FMODE_OPENED))) |
260 | goto out; | 261 | goto out; |
@@ -277,18 +278,20 @@ static void __fput(struct file *file) | |||
277 | if (file->f_op->release) | 278 | if (file->f_op->release) |
278 | file->f_op->release(inode, file); | 279 | file->f_op->release(inode, file); |
279 | if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && | 280 | if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && |
280 | !(file->f_mode & FMODE_PATH))) { | 281 | !(mode & FMODE_PATH))) { |
281 | cdev_put(inode->i_cdev); | 282 | cdev_put(inode->i_cdev); |
282 | } | 283 | } |
283 | fops_put(file->f_op); | 284 | fops_put(file->f_op); |
284 | put_pid(file->f_owner.pid); | 285 | put_pid(file->f_owner.pid); |
285 | if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) | 286 | if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) |
286 | i_readcount_dec(inode); | 287 | i_readcount_dec(inode); |
287 | if (file->f_mode & FMODE_WRITER) { | 288 | if (mode & FMODE_WRITER) { |
288 | put_write_access(inode); | 289 | put_write_access(inode); |
289 | __mnt_drop_write(mnt); | 290 | __mnt_drop_write(mnt); |
290 | } | 291 | } |
291 | dput(dentry); | 292 | dput(dentry); |
293 | if (unlikely(mode & FMODE_NEED_UNMOUNT)) | ||
294 | dissolve_on_fput(mnt); | ||
292 | mntput(mnt); | 295 | mntput(mnt); |
293 | out: | 296 | out: |
294 | file_free(file); | 297 | file_free(file); |
diff --git a/fs/internal.h b/fs/internal.h index 6a8b71643af4..f3a027c44758 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -94,6 +94,7 @@ extern int __mnt_want_write_file(struct file *); | |||
94 | extern void __mnt_drop_write(struct vfsmount *); | 94 | extern void __mnt_drop_write(struct vfsmount *); |
95 | extern void __mnt_drop_write_file(struct file *); | 95 | extern void __mnt_drop_write_file(struct file *); |
96 | 96 | ||
97 | extern void dissolve_on_fput(struct vfsmount *); | ||
97 | /* | 98 | /* |
98 | * fs_struct.c | 99 | * fs_struct.c |
99 | */ | 100 | */ |
diff --git a/fs/namespace.c b/fs/namespace.c index c9cab307fa77..b804a1a497ee 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/init.h> /* init_rootfs */ | 20 | #include <linux/init.h> /* init_rootfs */ |
21 | #include <linux/fs_struct.h> /* get_fs_root et.al. */ | 21 | #include <linux/fs_struct.h> /* get_fs_root et.al. */ |
22 | #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ | 22 | #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ |
23 | #include <linux/file.h> | ||
23 | #include <linux/uaccess.h> | 24 | #include <linux/uaccess.h> |
24 | #include <linux/proc_ns.h> | 25 | #include <linux/proc_ns.h> |
25 | #include <linux/magic.h> | 26 | #include <linux/magic.h> |
@@ -1832,6 +1833,21 @@ struct vfsmount *collect_mounts(const struct path *path) | |||
1832 | return &tree->mnt; | 1833 | return &tree->mnt; |
1833 | } | 1834 | } |
1834 | 1835 | ||
1836 | static void free_mnt_ns(struct mnt_namespace *); | ||
1837 | static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool); | ||
1838 | |||
1839 | void dissolve_on_fput(struct vfsmount *mnt) | ||
1840 | { | ||
1841 | struct mnt_namespace *ns; | ||
1842 | namespace_lock(); | ||
1843 | lock_mount_hash(); | ||
1844 | ns = real_mount(mnt)->mnt_ns; | ||
1845 | umount_tree(real_mount(mnt), UMOUNT_CONNECTED); | ||
1846 | unlock_mount_hash(); | ||
1847 | namespace_unlock(); | ||
1848 | free_mnt_ns(ns); | ||
1849 | } | ||
1850 | |||
1835 | void drop_collected_mounts(struct vfsmount *mnt) | 1851 | void drop_collected_mounts(struct vfsmount *mnt) |
1836 | { | 1852 | { |
1837 | namespace_lock(); | 1853 | namespace_lock(); |
@@ -2222,6 +2238,30 @@ static bool has_locked_children(struct mount *mnt, struct dentry *dentry) | |||
2222 | return false; | 2238 | return false; |
2223 | } | 2239 | } |
2224 | 2240 | ||
2241 | static struct mount *__do_loopback(struct path *old_path, int recurse) | ||
2242 | { | ||
2243 | struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt); | ||
2244 | |||
2245 | if (IS_MNT_UNBINDABLE(old)) | ||
2246 | return mnt; | ||
2247 | |||
2248 | if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations) | ||
2249 | return mnt; | ||
2250 | |||
2251 | if (!recurse && has_locked_children(old, old_path->dentry)) | ||
2252 | return mnt; | ||
2253 | |||
2254 | if (recurse) | ||
2255 | mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE); | ||
2256 | else | ||
2257 | mnt = clone_mnt(old, old_path->dentry, 0); | ||
2258 | |||
2259 | if (!IS_ERR(mnt)) | ||
2260 | mnt->mnt.mnt_flags &= ~MNT_LOCKED; | ||
2261 | |||
2262 | return mnt; | ||
2263 | } | ||
2264 | |||
2225 | /* | 2265 | /* |
2226 | * do loopback mount. | 2266 | * do loopback mount. |
2227 | */ | 2267 | */ |
@@ -2229,7 +2269,7 @@ static int do_loopback(struct path *path, const char *old_name, | |||
2229 | int recurse) | 2269 | int recurse) |
2230 | { | 2270 | { |
2231 | struct path old_path; | 2271 | struct path old_path; |
2232 | struct mount *mnt = NULL, *old, *parent; | 2272 | struct mount *mnt = NULL, *parent; |
2233 | struct mountpoint *mp; | 2273 | struct mountpoint *mp; |
2234 | int err; | 2274 | int err; |
2235 | if (!old_name || !*old_name) | 2275 | if (!old_name || !*old_name) |
@@ -2243,38 +2283,21 @@ static int do_loopback(struct path *path, const char *old_name, | |||
2243 | goto out; | 2283 | goto out; |
2244 | 2284 | ||
2245 | mp = lock_mount(path); | 2285 | mp = lock_mount(path); |
2246 | err = PTR_ERR(mp); | 2286 | if (IS_ERR(mp)) { |
2247 | if (IS_ERR(mp)) | 2287 | err = PTR_ERR(mp); |
2248 | goto out; | 2288 | goto out; |
2289 | } | ||
2249 | 2290 | ||
2250 | old = real_mount(old_path.mnt); | ||
2251 | parent = real_mount(path->mnt); | 2291 | parent = real_mount(path->mnt); |
2252 | |||
2253 | err = -EINVAL; | ||
2254 | if (IS_MNT_UNBINDABLE(old)) | ||
2255 | goto out2; | ||
2256 | |||
2257 | if (!check_mnt(parent)) | 2292 | if (!check_mnt(parent)) |
2258 | goto out2; | 2293 | goto out2; |
2259 | 2294 | ||
2260 | if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations) | 2295 | mnt = __do_loopback(&old_path, recurse); |
2261 | goto out2; | ||
2262 | |||
2263 | if (!recurse && has_locked_children(old, old_path.dentry)) | ||
2264 | goto out2; | ||
2265 | |||
2266 | if (recurse) | ||
2267 | mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE); | ||
2268 | else | ||
2269 | mnt = clone_mnt(old, old_path.dentry, 0); | ||
2270 | |||
2271 | if (IS_ERR(mnt)) { | 2296 | if (IS_ERR(mnt)) { |
2272 | err = PTR_ERR(mnt); | 2297 | err = PTR_ERR(mnt); |
2273 | goto out2; | 2298 | goto out2; |
2274 | } | 2299 | } |
2275 | 2300 | ||
2276 | mnt->mnt.mnt_flags &= ~MNT_LOCKED; | ||
2277 | |||
2278 | err = graft_tree(mnt, parent, mp); | 2301 | err = graft_tree(mnt, parent, mp); |
2279 | if (err) { | 2302 | if (err) { |
2280 | lock_mount_hash(); | 2303 | lock_mount_hash(); |
@@ -2288,6 +2311,96 @@ out: | |||
2288 | return err; | 2311 | return err; |
2289 | } | 2312 | } |
2290 | 2313 | ||
2314 | static struct file *open_detached_copy(struct path *path, bool recursive) | ||
2315 | { | ||
2316 | struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; | ||
2317 | struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true); | ||
2318 | struct mount *mnt, *p; | ||
2319 | struct file *file; | ||
2320 | |||
2321 | if (IS_ERR(ns)) | ||
2322 | return ERR_CAST(ns); | ||
2323 | |||
2324 | namespace_lock(); | ||
2325 | mnt = __do_loopback(path, recursive); | ||
2326 | if (IS_ERR(mnt)) { | ||
2327 | namespace_unlock(); | ||
2328 | free_mnt_ns(ns); | ||
2329 | return ERR_CAST(mnt); | ||
2330 | } | ||
2331 | |||
2332 | lock_mount_hash(); | ||
2333 | for (p = mnt; p; p = next_mnt(p, mnt)) { | ||
2334 | p->mnt_ns = ns; | ||
2335 | ns->mounts++; | ||
2336 | } | ||
2337 | ns->root = mnt; | ||
2338 | list_add_tail(&ns->list, &mnt->mnt_list); | ||
2339 | mntget(&mnt->mnt); | ||
2340 | unlock_mount_hash(); | ||
2341 | namespace_unlock(); | ||
2342 | |||
2343 | mntput(path->mnt); | ||
2344 | path->mnt = &mnt->mnt; | ||
2345 | file = dentry_open(path, O_PATH, current_cred()); | ||
2346 | if (IS_ERR(file)) | ||
2347 | dissolve_on_fput(path->mnt); | ||
2348 | else | ||
2349 | file->f_mode |= FMODE_NEED_UNMOUNT; | ||
2350 | return file; | ||
2351 | } | ||
2352 | |||
2353 | SYSCALL_DEFINE3(open_tree, int, dfd, const char *, filename, unsigned, flags) | ||
2354 | { | ||
2355 | struct file *file; | ||
2356 | struct path path; | ||
2357 | int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW; | ||
2358 | bool detached = flags & OPEN_TREE_CLONE; | ||
2359 | int error; | ||
2360 | int fd; | ||
2361 | |||
2362 | BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC); | ||
2363 | |||
2364 | if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE | | ||
2365 | AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE | | ||
2366 | OPEN_TREE_CLOEXEC)) | ||
2367 | return -EINVAL; | ||
2368 | |||
2369 | if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE) | ||
2370 | return -EINVAL; | ||
2371 | |||
2372 | if (flags & AT_NO_AUTOMOUNT) | ||
2373 | lookup_flags &= ~LOOKUP_AUTOMOUNT; | ||
2374 | if (flags & AT_SYMLINK_NOFOLLOW) | ||
2375 | lookup_flags &= ~LOOKUP_FOLLOW; | ||
2376 | if (flags & AT_EMPTY_PATH) | ||
2377 | lookup_flags |= LOOKUP_EMPTY; | ||
2378 | |||
2379 | if (detached && !may_mount()) | ||
2380 | return -EPERM; | ||
2381 | |||
2382 | fd = get_unused_fd_flags(flags & O_CLOEXEC); | ||
2383 | if (fd < 0) | ||
2384 | return fd; | ||
2385 | |||
2386 | error = user_path_at(dfd, filename, lookup_flags, &path); | ||
2387 | if (unlikely(error)) { | ||
2388 | file = ERR_PTR(error); | ||
2389 | } else { | ||
2390 | if (detached) | ||
2391 | file = open_detached_copy(&path, flags & AT_RECURSIVE); | ||
2392 | else | ||
2393 | file = dentry_open(&path, O_PATH, current_cred()); | ||
2394 | path_put(&path); | ||
2395 | } | ||
2396 | if (IS_ERR(file)) { | ||
2397 | put_unused_fd(fd); | ||
2398 | return PTR_ERR(file); | ||
2399 | } | ||
2400 | fd_install(fd, file); | ||
2401 | return fd; | ||
2402 | } | ||
2403 | |||
2291 | /* | 2404 | /* |
2292 | * Don't allow locked mount flags to be cleared. | 2405 | * Don't allow locked mount flags to be cleared. |
2293 | * | 2406 | * |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 8b42df09b04c..09b05ec5d059 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -162,10 +162,13 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, | |||
162 | #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) | 162 | #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) |
163 | 163 | ||
164 | /* File is capable of returning -EAGAIN if I/O will block */ | 164 | /* File is capable of returning -EAGAIN if I/O will block */ |
165 | #define FMODE_NOWAIT ((__force fmode_t)0x8000000) | 165 | #define FMODE_NOWAIT ((__force fmode_t)0x8000000) |
166 | |||
167 | /* File represents mount that needs unmounting */ | ||
168 | #define FMODE_NEED_UNMOUNT ((__force fmode_t)0x10000000) | ||
166 | 169 | ||
167 | /* File does not contribute to nr_files count */ | 170 | /* File does not contribute to nr_files count */ |
168 | #define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) | 171 | #define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) |
169 | 172 | ||
170 | /* | 173 | /* |
171 | * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector | 174 | * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e446806a561f..6c29d586e66b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -985,6 +985,7 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, | |||
985 | unsigned mask, struct statx __user *buffer); | 985 | unsigned mask, struct statx __user *buffer); |
986 | asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, | 986 | asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, |
987 | int flags, uint32_t sig); | 987 | int flags, uint32_t sig); |
988 | asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags); | ||
988 | asmlinkage long sys_pidfd_send_signal(int pidfd, int sig, | 989 | asmlinkage long sys_pidfd_send_signal(int pidfd, int sig, |
989 | siginfo_t __user *info, | 990 | siginfo_t __user *info, |
990 | unsigned int flags); | 991 | unsigned int flags); |
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index a2f8658f1c55..1d338357df8a 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h | |||
@@ -91,5 +91,7 @@ | |||
91 | #define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ | 91 | #define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ |
92 | #define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ | 92 | #define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ |
93 | 93 | ||
94 | #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ | ||
95 | |||
94 | 96 | ||
95 | #endif /* _UAPI_LINUX_FCNTL_H */ | 97 | #endif /* _UAPI_LINUX_FCNTL_H */ |
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 3f9ec42510b0..fd7ae2e7eccf 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h | |||
@@ -55,4 +55,10 @@ | |||
55 | #define MS_MGC_VAL 0xC0ED0000 | 55 | #define MS_MGC_VAL 0xC0ED0000 |
56 | #define MS_MGC_MSK 0xffff0000 | 56 | #define MS_MGC_MSK 0xffff0000 |
57 | 57 | ||
58 | /* | ||
59 | * open_tree() flags. | ||
60 | */ | ||
61 | #define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ | ||
62 | #define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ | ||
63 | |||
58 | #endif /* _UAPI_LINUX_MOUNT_H */ | 64 | #endif /* _UAPI_LINUX_MOUNT_H */ |