aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl7
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl6
-rw-r--r--fs/Makefile2
-rw-r--r--fs/file_table.c9
-rw-r--r--fs/fs_context.c160
-rw-r--r--fs/fsopen.c477
-rw-r--r--fs/internal.h4
-rw-r--r--fs/namespace.c477
-rw-r--r--include/linux/fs.h7
-rw-r--r--include/linux/fs_context.h38
-rw-r--r--include/linux/lsm_hooks.h6
-rw-r--r--include/linux/module.h6
-rw-r--r--include/linux/security.h7
-rw-r--r--include/linux/syscalls.h9
-rw-r--r--include/uapi/linux/fcntl.h2
-rw-r--r--include/uapi/linux/mount.h62
-rw-r--r--samples/Kconfig9
-rw-r--r--samples/Makefile2
-rw-r--r--samples/vfs/Makefile (renamed from samples/statx/Makefile)5
-rw-r--r--samples/vfs/test-fsmount.c133
-rw-r--r--samples/vfs/test-statx.c (renamed from samples/statx/test-statx.c)11
-rw-r--r--security/security.c5
22 files changed, 1353 insertions, 91 deletions
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 1f9607ed087c..4cd5f982b1e5 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -398,7 +398,12 @@
398384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl 398384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
399385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents 399385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents
400386 i386 rseq sys_rseq __ia32_sys_rseq 400386 i386 rseq sys_rseq __ia32_sys_rseq
401# don't use numbers 387 through 392, add new calls at the end 401387 i386 open_tree sys_open_tree __ia32_sys_open_tree
402388 i386 move_mount sys_move_mount __ia32_sys_move_mount
403389 i386 fsopen sys_fsopen __ia32_sys_fsopen
404390 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig
405391 i386 fsmount sys_fsmount __ia32_sys_fsmount
406392 i386 fspick sys_fspick __ia32_sys_fspick
402393 i386 semget sys_semget __ia32_sys_semget 407393 i386 semget sys_semget __ia32_sys_semget
403394 i386 semctl sys_semctl __ia32_compat_sys_semctl 408394 i386 semctl sys_semctl __ia32_compat_sys_semctl
404395 i386 shmget sys_shmget __ia32_sys_shmget 409395 i386 shmget sys_shmget __ia32_sys_shmget
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 92ee0b4378d4..64ca0d06259a 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -343,6 +343,12 @@
343332 common statx __x64_sys_statx 343332 common statx __x64_sys_statx
344333 common io_pgetevents __x64_sys_io_pgetevents 344333 common io_pgetevents __x64_sys_io_pgetevents
345334 common rseq __x64_sys_rseq 345334 common rseq __x64_sys_rseq
346335 common open_tree __x64_sys_open_tree
347336 common move_mount __x64_sys_move_mount
348337 common fsopen __x64_sys_fsopen
349338 common fsconfig __x64_sys_fsconfig
350339 common fsmount __x64_sys_fsmount
351340 common fspick __x64_sys_fspick
346# don't use numbers 387 through 423, add new calls after the last 352# don't use numbers 387 through 423, add new calls after the last
347# 'common' entry 353# 'common' entry
348424 common pidfd_send_signal __x64_sys_pidfd_send_signal 354424 common pidfd_send_signal __x64_sys_pidfd_send_signal
diff --git a/fs/Makefile b/fs/Makefile
index 35945f8139e6..5a51bc2489ba 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -13,7 +13,7 @@ obj-y := open.o read_write.o file_table.o super.o \
13 seq_file.o xattr.o libfs.o fs-writeback.o \ 13 seq_file.o xattr.o libfs.o fs-writeback.o \
14 pnode.o splice.o sync.o utimes.o d_path.o \ 14 pnode.o splice.o sync.o utimes.o d_path.o \
15 stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \ 15 stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
16 fs_types.o fs_context.o fs_parser.o 16 fs_types.o fs_context.o fs_parser.o fsopen.o
17 17
18ifeq ($(CONFIG_BLOCK),y) 18ifeq ($(CONFIG_BLOCK),y)
19obj-y += buffer.o block_dev.o direct-io.o mpage.o 19obj-y += buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/file_table.c b/fs/file_table.c
index 155d7514a094..3f9c1b452c1d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -255,6 +255,7 @@ static void __fput(struct file *file)
255 struct dentry *dentry = file->f_path.dentry; 255 struct dentry *dentry = file->f_path.dentry;
256 struct vfsmount *mnt = file->f_path.mnt; 256 struct vfsmount *mnt = file->f_path.mnt;
257 struct inode *inode = file->f_inode; 257 struct inode *inode = file->f_inode;
258 fmode_t mode = file->f_mode;
258 259
259 if (unlikely(!(file->f_mode & FMODE_OPENED))) 260 if (unlikely(!(file->f_mode & FMODE_OPENED)))
260 goto out; 261 goto out;
@@ -277,18 +278,20 @@ static void __fput(struct file *file)
277 if (file->f_op->release) 278 if (file->f_op->release)
278 file->f_op->release(inode, file); 279 file->f_op->release(inode, file);
279 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && 280 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
280 !(file->f_mode & FMODE_PATH))) { 281 !(mode & FMODE_PATH))) {
281 cdev_put(inode->i_cdev); 282 cdev_put(inode->i_cdev);
282 } 283 }
283 fops_put(file->f_op); 284 fops_put(file->f_op);
284 put_pid(file->f_owner.pid); 285 put_pid(file->f_owner.pid);
285 if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) 286 if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
286 i_readcount_dec(inode); 287 i_readcount_dec(inode);
287 if (file->f_mode & FMODE_WRITER) { 288 if (mode & FMODE_WRITER) {
288 put_write_access(inode); 289 put_write_access(inode);
289 __mnt_drop_write(mnt); 290 __mnt_drop_write(mnt);
290 } 291 }
291 dput(dentry); 292 dput(dentry);
293 if (unlikely(mode & FMODE_NEED_UNMOUNT))
294 dissolve_on_fput(mnt);
292 mntput(mnt); 295 mntput(mnt);
293out: 296out:
294 file_free(file); 297 file_free(file);
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 87e3546b9a52..a47ccd5a4a78 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -11,6 +11,7 @@
11 */ 11 */
12 12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14#include <linux/module.h>
14#include <linux/fs_context.h> 15#include <linux/fs_context.h>
15#include <linux/fs_parser.h> 16#include <linux/fs_parser.h>
16#include <linux/fs.h> 17#include <linux/fs.h>
@@ -23,6 +24,7 @@
23#include <linux/pid_namespace.h> 24#include <linux/pid_namespace.h>
24#include <linux/user_namespace.h> 25#include <linux/user_namespace.h>
25#include <net/net_namespace.h> 26#include <net/net_namespace.h>
27#include <asm/sections.h>
26#include "mount.h" 28#include "mount.h"
27#include "internal.h" 29#include "internal.h"
28 30
@@ -271,6 +273,8 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
271 fc->cred = get_current_cred(); 273 fc->cred = get_current_cred();
272 fc->net_ns = get_net(current->nsproxy->net_ns); 274 fc->net_ns = get_net(current->nsproxy->net_ns);
273 275
276 mutex_init(&fc->uapi_mutex);
277
274 switch (purpose) { 278 switch (purpose) {
275 case FS_CONTEXT_FOR_MOUNT: 279 case FS_CONTEXT_FOR_MOUNT:
276 fc->user_ns = get_user_ns(fc->cred->user_ns); 280 fc->user_ns = get_user_ns(fc->cred->user_ns);
@@ -353,6 +357,8 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
353 if (!fc) 357 if (!fc)
354 return ERR_PTR(-ENOMEM); 358 return ERR_PTR(-ENOMEM);
355 359
360 mutex_init(&fc->uapi_mutex);
361
356 fc->fs_private = NULL; 362 fc->fs_private = NULL;
357 fc->s_fs_info = NULL; 363 fc->s_fs_info = NULL;
358 fc->source = NULL; 364 fc->source = NULL;
@@ -361,6 +367,8 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
361 get_net(fc->net_ns); 367 get_net(fc->net_ns);
362 get_user_ns(fc->user_ns); 368 get_user_ns(fc->user_ns);
363 get_cred(fc->cred); 369 get_cred(fc->cred);
370 if (fc->log)
371 refcount_inc(&fc->log->usage);
364 372
365 /* Can't call put until we've called ->dup */ 373 /* Can't call put until we've called ->dup */
366 ret = fc->ops->dup(fc, src_fc); 374 ret = fc->ops->dup(fc, src_fc);
@@ -378,7 +386,6 @@ err_fc:
378} 386}
379EXPORT_SYMBOL(vfs_dup_fs_context); 387EXPORT_SYMBOL(vfs_dup_fs_context);
380 388
381#ifdef CONFIG_PRINTK
382/** 389/**
383 * logfc - Log a message to a filesystem context 390 * logfc - Log a message to a filesystem context
384 * @fc: The filesystem context to log to. 391 * @fc: The filesystem context to log to.
@@ -386,27 +393,100 @@ EXPORT_SYMBOL(vfs_dup_fs_context);
386 */ 393 */
387void logfc(struct fs_context *fc, const char *fmt, ...) 394void logfc(struct fs_context *fc, const char *fmt, ...)
388{ 395{
396 static const char store_failure[] = "OOM: Can't store error string";
397 struct fc_log *log = fc ? fc->log : NULL;
398 const char *p;
389 va_list va; 399 va_list va;
400 char *q;
401 u8 freeable;
390 402
391 va_start(va, fmt); 403 va_start(va, fmt);
392 404 if (!strchr(fmt, '%')) {
393 switch (fmt[0]) { 405 p = fmt;
394 case 'w': 406 goto unformatted_string;
395 vprintk_emit(0, LOGLEVEL_WARNING, NULL, 0, fmt, va); 407 }
396 break; 408 if (strcmp(fmt, "%s") == 0) {
397 case 'e': 409 p = va_arg(va, const char *);
398 vprintk_emit(0, LOGLEVEL_ERR, NULL, 0, fmt, va); 410 goto unformatted_string;
399 break;
400 default:
401 vprintk_emit(0, LOGLEVEL_NOTICE, NULL, 0, fmt, va);
402 break;
403 } 411 }
404 412
405 pr_cont("\n"); 413 q = kvasprintf(GFP_KERNEL, fmt, va);
414copied_string:
415 if (!q)
416 goto store_failure;
417 freeable = 1;
418 goto store_string;
419
420unformatted_string:
421 if ((unsigned long)p >= (unsigned long)__start_rodata &&
422 (unsigned long)p < (unsigned long)__end_rodata)
423 goto const_string;
424 if (log && within_module_core((unsigned long)p, log->owner))
425 goto const_string;
426 q = kstrdup(p, GFP_KERNEL);
427 goto copied_string;
428
429store_failure:
430 p = store_failure;
431const_string:
432 q = (char *)p;
433 freeable = 0;
434store_string:
435 if (!log) {
436 switch (fmt[0]) {
437 case 'w':
438 printk(KERN_WARNING "%s\n", q + 2);
439 break;
440 case 'e':
441 printk(KERN_ERR "%s\n", q + 2);
442 break;
443 default:
444 printk(KERN_NOTICE "%s\n", q + 2);
445 break;
446 }
447 if (freeable)
448 kfree(q);
449 } else {
450 unsigned int logsize = ARRAY_SIZE(log->buffer);
451 u8 index;
452
453 index = log->head & (logsize - 1);
454 BUILD_BUG_ON(sizeof(log->head) != sizeof(u8) ||
455 sizeof(log->tail) != sizeof(u8));
456 if ((u8)(log->head - log->tail) == logsize) {
457 /* The buffer is full, discard the oldest message */
458 if (log->need_free & (1 << index))
459 kfree(log->buffer[index]);
460 log->tail++;
461 }
462
463 log->buffer[index] = q;
464 log->need_free &= ~(1 << index);
465 log->need_free |= freeable << index;
466 log->head++;
467 }
406 va_end(va); 468 va_end(va);
407} 469}
408EXPORT_SYMBOL(logfc); 470EXPORT_SYMBOL(logfc);
409#endif 471
472/*
473 * Free a logging structure.
474 */
475static void put_fc_log(struct fs_context *fc)
476{
477 struct fc_log *log = fc->log;
478 int i;
479
480 if (log) {
481 if (refcount_dec_and_test(&log->usage)) {
482 fc->log = NULL;
483 for (i = 0; i <= 7; i++)
484 if (log->need_free & (1 << i))
485 kfree(log->buffer[i]);
486 kfree(log);
487 }
488 }
489}
410 490
411/** 491/**
412 * put_fs_context - Dispose of a superblock configuration context. 492 * put_fs_context - Dispose of a superblock configuration context.
@@ -431,6 +511,7 @@ void put_fs_context(struct fs_context *fc)
431 put_user_ns(fc->user_ns); 511 put_user_ns(fc->user_ns);
432 put_cred(fc->cred); 512 put_cred(fc->cred);
433 kfree(fc->subtype); 513 kfree(fc->subtype);
514 put_fc_log(fc);
434 put_filesystem(fc->fs_type); 515 put_filesystem(fc->fs_type);
435 kfree(fc->source); 516 kfree(fc->source);
436 kfree(fc); 517 kfree(fc);
@@ -640,3 +721,54 @@ int parse_monolithic_mount_data(struct fs_context *fc, void *data)
640 721
641 return monolithic_mount_data(fc, data); 722 return monolithic_mount_data(fc, data);
642} 723}
724
725/*
726 * Clean up a context after performing an action on it and put it into a state
727 * from where it can be used to reconfigure a superblock.
728 *
729 * Note that here we do only the parts that can't fail; the rest is in
730 * finish_clean_context() below and in between those fs_context is marked
731 * FS_CONTEXT_AWAITING_RECONF. The reason for splitup is that after
732 * successful mount or remount we need to report success to userland.
733 * Trying to do full reinit (for the sake of possible subsequent remount)
734 * and failing to allocate memory would've put us into a nasty situation.
735 * So here we only discard the old state and reinitialization is left
736 * until we actually try to reconfigure.
737 */
738void vfs_clean_context(struct fs_context *fc)
739{
740 if (fc->need_free && fc->ops && fc->ops->free)
741 fc->ops->free(fc);
742 fc->need_free = false;
743 fc->fs_private = NULL;
744 fc->s_fs_info = NULL;
745 fc->sb_flags = 0;
746 security_free_mnt_opts(&fc->security);
747 kfree(fc->subtype);
748 fc->subtype = NULL;
749 kfree(fc->source);
750 fc->source = NULL;
751
752 fc->purpose = FS_CONTEXT_FOR_RECONFIGURE;
753 fc->phase = FS_CONTEXT_AWAITING_RECONF;
754}
755
756int finish_clean_context(struct fs_context *fc)
757{
758 int error;
759
760 if (fc->phase != FS_CONTEXT_AWAITING_RECONF)
761 return 0;
762
763 if (fc->fs_type->init_fs_context)
764 error = fc->fs_type->init_fs_context(fc);
765 else
766 error = legacy_init_fs_context(fc);
767 if (unlikely(error)) {
768 fc->phase = FS_CONTEXT_FAILED;
769 return error;
770 }
771 fc->need_free = true;
772 fc->phase = FS_CONTEXT_RECONF_PARAMS;
773 return 0;
774}
diff --git a/fs/fsopen.c b/fs/fsopen.c
new file mode 100644
index 000000000000..3bb9c0c8cbcc
--- /dev/null
+++ b/fs/fsopen.c
@@ -0,0 +1,477 @@
1/* Filesystem access-by-fd.
2 *
3 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/fs_context.h>
13#include <linux/fs_parser.h>
14#include <linux/slab.h>
15#include <linux/uaccess.h>
16#include <linux/syscalls.h>
17#include <linux/security.h>
18#include <linux/anon_inodes.h>
19#include <linux/namei.h>
20#include <linux/file.h>
21#include <uapi/linux/mount.h>
22#include "internal.h"
23#include "mount.h"
24
25/*
26 * Allow the user to read back any error, warning or informational messages.
27 */
28static ssize_t fscontext_read(struct file *file,
29 char __user *_buf, size_t len, loff_t *pos)
30{
31 struct fs_context *fc = file->private_data;
32 struct fc_log *log = fc->log;
33 unsigned int logsize = ARRAY_SIZE(log->buffer);
34 ssize_t ret;
35 char *p;
36 bool need_free;
37 int index, n;
38
39 ret = mutex_lock_interruptible(&fc->uapi_mutex);
40 if (ret < 0)
41 return ret;
42
43 if (log->head == log->tail) {
44 mutex_unlock(&fc->uapi_mutex);
45 return -ENODATA;
46 }
47
48 index = log->tail & (logsize - 1);
49 p = log->buffer[index];
50 need_free = log->need_free & (1 << index);
51 log->buffer[index] = NULL;
52 log->need_free &= ~(1 << index);
53 log->tail++;
54 mutex_unlock(&fc->uapi_mutex);
55
56 ret = -EMSGSIZE;
57 n = strlen(p);
58 if (n > len)
59 goto err_free;
60 ret = -EFAULT;
61 if (copy_to_user(_buf, p, n) != 0)
62 goto err_free;
63 ret = n;
64
65err_free:
66 if (need_free)
67 kfree(p);
68 return ret;
69}
70
71static int fscontext_release(struct inode *inode, struct file *file)
72{
73 struct fs_context *fc = file->private_data;
74
75 if (fc) {
76 file->private_data = NULL;
77 put_fs_context(fc);
78 }
79 return 0;
80}
81
82const struct file_operations fscontext_fops = {
83 .read = fscontext_read,
84 .release = fscontext_release,
85 .llseek = no_llseek,
86};
87
88/*
89 * Attach a filesystem context to a file and an fd.
90 */
91static int fscontext_create_fd(struct fs_context *fc, unsigned int o_flags)
92{
93 int fd;
94
95 fd = anon_inode_getfd("fscontext", &fscontext_fops, fc,
96 O_RDWR | o_flags);
97 if (fd < 0)
98 put_fs_context(fc);
99 return fd;
100}
101
102static int fscontext_alloc_log(struct fs_context *fc)
103{
104 fc->log = kzalloc(sizeof(*fc->log), GFP_KERNEL);
105 if (!fc->log)
106 return -ENOMEM;
107 refcount_set(&fc->log->usage, 1);
108 fc->log->owner = fc->fs_type->owner;
109 return 0;
110}
111
112/*
113 * Open a filesystem by name so that it can be configured for mounting.
114 *
115 * We are allowed to specify a container in which the filesystem will be
116 * opened, thereby indicating which namespaces will be used (notably, which
117 * network namespace will be used for network filesystems).
118 */
119SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags)
120{
121 struct file_system_type *fs_type;
122 struct fs_context *fc;
123 const char *fs_name;
124 int ret;
125
126 if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
127 return -EPERM;
128
129 if (flags & ~FSOPEN_CLOEXEC)
130 return -EINVAL;
131
132 fs_name = strndup_user(_fs_name, PAGE_SIZE);
133 if (IS_ERR(fs_name))
134 return PTR_ERR(fs_name);
135
136 fs_type = get_fs_type(fs_name);
137 kfree(fs_name);
138 if (!fs_type)
139 return -ENODEV;
140
141 fc = fs_context_for_mount(fs_type, 0);
142 put_filesystem(fs_type);
143 if (IS_ERR(fc))
144 return PTR_ERR(fc);
145
146 fc->phase = FS_CONTEXT_CREATE_PARAMS;
147
148 ret = fscontext_alloc_log(fc);
149 if (ret < 0)
150 goto err_fc;
151
152 return fscontext_create_fd(fc, flags & FSOPEN_CLOEXEC ? O_CLOEXEC : 0);
153
154err_fc:
155 put_fs_context(fc);
156 return ret;
157}
158
159/*
160 * Pick a superblock into a context for reconfiguration.
161 */
162SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags)
163{
164 struct fs_context *fc;
165 struct path target;
166 unsigned int lookup_flags;
167 int ret;
168
169 if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
170 return -EPERM;
171
172 if ((flags & ~(FSPICK_CLOEXEC |
173 FSPICK_SYMLINK_NOFOLLOW |
174 FSPICK_NO_AUTOMOUNT |
175 FSPICK_EMPTY_PATH)) != 0)
176 return -EINVAL;
177
178 lookup_flags = LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT;
179 if (flags & FSPICK_SYMLINK_NOFOLLOW)
180 lookup_flags &= ~LOOKUP_FOLLOW;
181 if (flags & FSPICK_NO_AUTOMOUNT)
182 lookup_flags &= ~LOOKUP_AUTOMOUNT;
183 if (flags & FSPICK_EMPTY_PATH)
184 lookup_flags |= LOOKUP_EMPTY;
185 ret = user_path_at(dfd, path, lookup_flags, &target);
186 if (ret < 0)
187 goto err;
188
189 ret = -EINVAL;
190 if (target.mnt->mnt_root != target.dentry)
191 goto err_path;
192
193 fc = fs_context_for_reconfigure(target.dentry, 0, 0);
194 if (IS_ERR(fc)) {
195 ret = PTR_ERR(fc);
196 goto err_path;
197 }
198
199 fc->phase = FS_CONTEXT_RECONF_PARAMS;
200
201 ret = fscontext_alloc_log(fc);
202 if (ret < 0)
203 goto err_fc;
204
205 path_put(&target);
206 return fscontext_create_fd(fc, flags & FSPICK_CLOEXEC ? O_CLOEXEC : 0);
207
208err_fc:
209 put_fs_context(fc);
210err_path:
211 path_put(&target);
212err:
213 return ret;
214}
215
216/*
217 * Check the state and apply the configuration. Note that this function is
218 * allowed to 'steal' the value by setting param->xxx to NULL before returning.
219 */
220static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
221 struct fs_parameter *param)
222{
223 struct super_block *sb;
224 int ret;
225
226 ret = finish_clean_context(fc);
227 if (ret)
228 return ret;
229 switch (cmd) {
230 case FSCONFIG_CMD_CREATE:
231 if (fc->phase != FS_CONTEXT_CREATE_PARAMS)
232 return -EBUSY;
233 fc->phase = FS_CONTEXT_CREATING;
234 ret = vfs_get_tree(fc);
235 if (ret)
236 break;
237 sb = fc->root->d_sb;
238 ret = security_sb_kern_mount(sb);
239 if (unlikely(ret)) {
240 fc_drop_locked(fc);
241 break;
242 }
243 up_write(&sb->s_umount);
244 fc->phase = FS_CONTEXT_AWAITING_MOUNT;
245 return 0;
246 case FSCONFIG_CMD_RECONFIGURE:
247 if (fc->phase != FS_CONTEXT_RECONF_PARAMS)
248 return -EBUSY;
249 fc->phase = FS_CONTEXT_RECONFIGURING;
250 sb = fc->root->d_sb;
251 if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
252 ret = -EPERM;
253 break;
254 }
255 down_write(&sb->s_umount);
256 ret = reconfigure_super(fc);
257 up_write(&sb->s_umount);
258 if (ret)
259 break;
260 vfs_clean_context(fc);
261 return 0;
262 default:
263 if (fc->phase != FS_CONTEXT_CREATE_PARAMS &&
264 fc->phase != FS_CONTEXT_RECONF_PARAMS)
265 return -EBUSY;
266
267 return vfs_parse_fs_param(fc, param);
268 }
269 fc->phase = FS_CONTEXT_FAILED;
270 return ret;
271}
272
273/**
274 * sys_fsconfig - Set parameters and trigger actions on a context
275 * @fd: The filesystem context to act upon
276 * @cmd: The action to take
277 * @_key: Where appropriate, the parameter key to set
278 * @_value: Where appropriate, the parameter value to set
279 * @aux: Additional information for the value
280 *
281 * This system call is used to set parameters on a context, including
282 * superblock settings, data source and security labelling.
283 *
284 * Actions include triggering the creation of a superblock and the
285 * reconfiguration of the superblock attached to the specified context.
286 *
287 * When setting a parameter, @cmd indicates the type of value being proposed
288 * and @_key indicates the parameter to be altered.
289 *
290 * @_value and @aux are used to specify the value, should a value be required:
291 *
292 * (*) fsconfig_set_flag: No value is specified. The parameter must be boolean
293 * in nature. The key may be prefixed with "no" to invert the
294 * setting. @_value must be NULL and @aux must be 0.
295 *
296 * (*) fsconfig_set_string: A string value is specified. The parameter can be
297 * expecting boolean, integer, string or take a path. A conversion to an
298 * appropriate type will be attempted (which may include looking up as a
299 * path). @_value points to a NUL-terminated string and @aux must be 0.
300 *
301 * (*) fsconfig_set_binary: A binary blob is specified. @_value points to the
302 * blob and @aux indicates its size. The parameter must be expecting a
303 * blob.
304 *
305 * (*) fsconfig_set_path: A non-empty path is specified. The parameter must be
306 * expecting a path object. @_value points to a NUL-terminated string that
307 * is the path and @aux is a file descriptor at which to start a relative
308 * lookup or AT_FDCWD.
309 *
310 * (*) fsconfig_set_path_empty: As fsconfig_set_path, but with AT_EMPTY_PATH
311 * implied.
312 *
313 * (*) fsconfig_set_fd: An open file descriptor is specified. @_value must be
314 * NULL and @aux indicates the file descriptor.
315 */
316SYSCALL_DEFINE5(fsconfig,
317 int, fd,
318 unsigned int, cmd,
319 const char __user *, _key,
320 const void __user *, _value,
321 int, aux)
322{
323 struct fs_context *fc;
324 struct fd f;
325 int ret;
326
327 struct fs_parameter param = {
328 .type = fs_value_is_undefined,
329 };
330
331 if (fd < 0)
332 return -EINVAL;
333
334 switch (cmd) {
335 case FSCONFIG_SET_FLAG:
336 if (!_key || _value || aux)
337 return -EINVAL;
338 break;
339 case FSCONFIG_SET_STRING:
340 if (!_key || !_value || aux)
341 return -EINVAL;
342 break;
343 case FSCONFIG_SET_BINARY:
344 if (!_key || !_value || aux <= 0 || aux > 1024 * 1024)
345 return -EINVAL;
346 break;
347 case FSCONFIG_SET_PATH:
348 case FSCONFIG_SET_PATH_EMPTY:
349 if (!_key || !_value || (aux != AT_FDCWD && aux < 0))
350 return -EINVAL;
351 break;
352 case FSCONFIG_SET_FD:
353 if (!_key || _value || aux < 0)
354 return -EINVAL;
355 break;
356 case FSCONFIG_CMD_CREATE:
357 case FSCONFIG_CMD_RECONFIGURE:
358 if (_key || _value || aux)
359 return -EINVAL;
360 break;
361 default:
362 return -EOPNOTSUPP;
363 }
364
365 f = fdget(fd);
366 if (!f.file)
367 return -EBADF;
368 ret = -EINVAL;
369 if (f.file->f_op != &fscontext_fops)
370 goto out_f;
371
372 fc = f.file->private_data;
373 if (fc->ops == &legacy_fs_context_ops) {
374 switch (cmd) {
375 case FSCONFIG_SET_BINARY:
376 case FSCONFIG_SET_PATH:
377 case FSCONFIG_SET_PATH_EMPTY:
378 case FSCONFIG_SET_FD:
379 ret = -EOPNOTSUPP;
380 goto out_f;
381 }
382 }
383
384 if (_key) {
385 param.key = strndup_user(_key, 256);
386 if (IS_ERR(param.key)) {
387 ret = PTR_ERR(param.key);
388 goto out_f;
389 }
390 }
391
392 switch (cmd) {
393 case FSCONFIG_SET_FLAG:
394 param.type = fs_value_is_flag;
395 break;
396 case FSCONFIG_SET_STRING:
397 param.type = fs_value_is_string;
398 param.string = strndup_user(_value, 256);
399 if (IS_ERR(param.string)) {
400 ret = PTR_ERR(param.string);
401 goto out_key;
402 }
403 param.size = strlen(param.string);
404 break;
405 case FSCONFIG_SET_BINARY:
406 param.type = fs_value_is_blob;
407 param.size = aux;
408 param.blob = memdup_user_nul(_value, aux);
409 if (IS_ERR(param.blob)) {
410 ret = PTR_ERR(param.blob);
411 goto out_key;
412 }
413 break;
414 case FSCONFIG_SET_PATH:
415 param.type = fs_value_is_filename;
416 param.name = getname_flags(_value, 0, NULL);
417 if (IS_ERR(param.name)) {
418 ret = PTR_ERR(param.name);
419 goto out_key;
420 }
421 param.dirfd = aux;
422 param.size = strlen(param.name->name);
423 break;
424 case FSCONFIG_SET_PATH_EMPTY:
425 param.type = fs_value_is_filename_empty;
426 param.name = getname_flags(_value, LOOKUP_EMPTY, NULL);
427 if (IS_ERR(param.name)) {
428 ret = PTR_ERR(param.name);
429 goto out_key;
430 }
431 param.dirfd = aux;
432 param.size = strlen(param.name->name);
433 break;
434 case FSCONFIG_SET_FD:
435 param.type = fs_value_is_file;
436 ret = -EBADF;
437 param.file = fget(aux);
438 if (!param.file)
439 goto out_key;
440 break;
441 default:
442 break;
443 }
444
445 ret = mutex_lock_interruptible(&fc->uapi_mutex);
446 if (ret == 0) {
447 ret = vfs_fsconfig_locked(fc, cmd, &param);
448 mutex_unlock(&fc->uapi_mutex);
449 }
450
451 /* Clean up the our record of any value that we obtained from
452 * userspace. Note that the value may have been stolen by the LSM or
453 * filesystem, in which case the value pointer will have been cleared.
454 */
455 switch (cmd) {
456 case FSCONFIG_SET_STRING:
457 case FSCONFIG_SET_BINARY:
458 kfree(param.string);
459 break;
460 case FSCONFIG_SET_PATH:
461 case FSCONFIG_SET_PATH_EMPTY:
462 if (param.name)
463 putname(param.name);
464 break;
465 case FSCONFIG_SET_FD:
466 if (param.file)
467 fput(param.file);
468 break;
469 default:
470 break;
471 }
472out_key:
473 kfree(param.key);
474out_f:
475 fdput(f);
476 return ret;
477}
diff --git a/fs/internal.h b/fs/internal.h
index 17a8ae967493..0010889f2e85 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -55,8 +55,11 @@ extern void __init chrdev_init(void);
55/* 55/*
56 * fs_context.c 56 * fs_context.c
57 */ 57 */
58extern const struct fs_context_operations legacy_fs_context_ops;
58extern int parse_monolithic_mount_data(struct fs_context *, void *); 59extern int parse_monolithic_mount_data(struct fs_context *, void *);
59extern void fc_drop_locked(struct fs_context *); 60extern void fc_drop_locked(struct fs_context *);
61extern void vfs_clean_context(struct fs_context *fc);
62extern int finish_clean_context(struct fs_context *fc);
60 63
61/* 64/*
62 * namei.c 65 * namei.c
@@ -92,6 +95,7 @@ extern void __init mnt_init(void);
92extern int __mnt_want_write_file(struct file *); 95extern int __mnt_want_write_file(struct file *);
93extern void __mnt_drop_write_file(struct file *); 96extern void __mnt_drop_write_file(struct file *);
94 97
98extern void dissolve_on_fput(struct vfsmount *);
95/* 99/*
96 * fs_struct.c 100 * fs_struct.c
97 */ 101 */
diff --git a/fs/namespace.c b/fs/namespace.c
index c9cab307fa77..3357c3d65475 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -20,6 +20,7 @@
20#include <linux/init.h> /* init_rootfs */ 20#include <linux/init.h> /* init_rootfs */
21#include <linux/fs_struct.h> /* get_fs_root et.al. */ 21#include <linux/fs_struct.h> /* get_fs_root et.al. */
22#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ 22#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
23#include <linux/file.h>
23#include <linux/uaccess.h> 24#include <linux/uaccess.h>
24#include <linux/proc_ns.h> 25#include <linux/proc_ns.h>
25#include <linux/magic.h> 26#include <linux/magic.h>
@@ -1832,6 +1833,27 @@ struct vfsmount *collect_mounts(const struct path *path)
1832 return &tree->mnt; 1833 return &tree->mnt;
1833} 1834}
1834 1835
1836static void free_mnt_ns(struct mnt_namespace *);
1837static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool);
1838
1839void dissolve_on_fput(struct vfsmount *mnt)
1840{
1841 struct mnt_namespace *ns;
1842 namespace_lock();
1843 lock_mount_hash();
1844 ns = real_mount(mnt)->mnt_ns;
1845 if (ns) {
1846 if (is_anon_ns(ns))
1847 umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
1848 else
1849 ns = NULL;
1850 }
1851 unlock_mount_hash();
1852 namespace_unlock();
1853 if (ns)
1854 free_mnt_ns(ns);
1855}
1856
1835void drop_collected_mounts(struct vfsmount *mnt) 1857void drop_collected_mounts(struct vfsmount *mnt)
1836{ 1858{
1837 namespace_lock(); 1859 namespace_lock();
@@ -2065,6 +2087,10 @@ static int attach_recursive_mnt(struct mount *source_mnt,
2065 attach_mnt(source_mnt, dest_mnt, dest_mp); 2087 attach_mnt(source_mnt, dest_mnt, dest_mp);
2066 touch_mnt_namespace(source_mnt->mnt_ns); 2088 touch_mnt_namespace(source_mnt->mnt_ns);
2067 } else { 2089 } else {
2090 if (source_mnt->mnt_ns) {
2091 /* move from anon - the caller will destroy */
2092 list_del_init(&source_mnt->mnt_ns->list);
2093 }
2068 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); 2094 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
2069 commit_tree(source_mnt); 2095 commit_tree(source_mnt);
2070 } 2096 }
@@ -2222,6 +2248,30 @@ static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
2222 return false; 2248 return false;
2223} 2249}
2224 2250
2251static struct mount *__do_loopback(struct path *old_path, int recurse)
2252{
2253 struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
2254
2255 if (IS_MNT_UNBINDABLE(old))
2256 return mnt;
2257
2258 if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)
2259 return mnt;
2260
2261 if (!recurse && has_locked_children(old, old_path->dentry))
2262 return mnt;
2263
2264 if (recurse)
2265 mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
2266 else
2267 mnt = clone_mnt(old, old_path->dentry, 0);
2268
2269 if (!IS_ERR(mnt))
2270 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2271
2272 return mnt;
2273}
2274
2225/* 2275/*
2226 * do loopback mount. 2276 * do loopback mount.
2227 */ 2277 */
@@ -2229,7 +2279,7 @@ static int do_loopback(struct path *path, const char *old_name,
2229 int recurse) 2279 int recurse)
2230{ 2280{
2231 struct path old_path; 2281 struct path old_path;
2232 struct mount *mnt = NULL, *old, *parent; 2282 struct mount *mnt = NULL, *parent;
2233 struct mountpoint *mp; 2283 struct mountpoint *mp;
2234 int err; 2284 int err;
2235 if (!old_name || !*old_name) 2285 if (!old_name || !*old_name)
@@ -2243,38 +2293,21 @@ static int do_loopback(struct path *path, const char *old_name,
2243 goto out; 2293 goto out;
2244 2294
2245 mp = lock_mount(path); 2295 mp = lock_mount(path);
2246 err = PTR_ERR(mp); 2296 if (IS_ERR(mp)) {
2247 if (IS_ERR(mp)) 2297 err = PTR_ERR(mp);
2248 goto out; 2298 goto out;
2299 }
2249 2300
2250 old = real_mount(old_path.mnt);
2251 parent = real_mount(path->mnt); 2301 parent = real_mount(path->mnt);
2252
2253 err = -EINVAL;
2254 if (IS_MNT_UNBINDABLE(old))
2255 goto out2;
2256
2257 if (!check_mnt(parent)) 2302 if (!check_mnt(parent))
2258 goto out2; 2303 goto out2;
2259 2304
2260 if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations) 2305 mnt = __do_loopback(&old_path, recurse);
2261 goto out2;
2262
2263 if (!recurse && has_locked_children(old, old_path.dentry))
2264 goto out2;
2265
2266 if (recurse)
2267 mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
2268 else
2269 mnt = clone_mnt(old, old_path.dentry, 0);
2270
2271 if (IS_ERR(mnt)) { 2306 if (IS_ERR(mnt)) {
2272 err = PTR_ERR(mnt); 2307 err = PTR_ERR(mnt);
2273 goto out2; 2308 goto out2;
2274 } 2309 }
2275 2310
2276 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2277
2278 err = graft_tree(mnt, parent, mp); 2311 err = graft_tree(mnt, parent, mp);
2279 if (err) { 2312 if (err) {
2280 lock_mount_hash(); 2313 lock_mount_hash();
@@ -2288,6 +2321,96 @@ out:
2288 return err; 2321 return err;
2289} 2322}
2290 2323
2324static struct file *open_detached_copy(struct path *path, bool recursive)
2325{
2326 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2327 struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true);
2328 struct mount *mnt, *p;
2329 struct file *file;
2330
2331 if (IS_ERR(ns))
2332 return ERR_CAST(ns);
2333
2334 namespace_lock();
2335 mnt = __do_loopback(path, recursive);
2336 if (IS_ERR(mnt)) {
2337 namespace_unlock();
2338 free_mnt_ns(ns);
2339 return ERR_CAST(mnt);
2340 }
2341
2342 lock_mount_hash();
2343 for (p = mnt; p; p = next_mnt(p, mnt)) {
2344 p->mnt_ns = ns;
2345 ns->mounts++;
2346 }
2347 ns->root = mnt;
2348 list_add_tail(&ns->list, &mnt->mnt_list);
2349 mntget(&mnt->mnt);
2350 unlock_mount_hash();
2351 namespace_unlock();
2352
2353 mntput(path->mnt);
2354 path->mnt = &mnt->mnt;
2355 file = dentry_open(path, O_PATH, current_cred());
2356 if (IS_ERR(file))
2357 dissolve_on_fput(path->mnt);
2358 else
2359 file->f_mode |= FMODE_NEED_UNMOUNT;
2360 return file;
2361}
2362
2363SYSCALL_DEFINE3(open_tree, int, dfd, const char *, filename, unsigned, flags)
2364{
2365 struct file *file;
2366 struct path path;
2367 int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
2368 bool detached = flags & OPEN_TREE_CLONE;
2369 int error;
2370 int fd;
2371
2372 BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
2373
2374 if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
2375 AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
2376 OPEN_TREE_CLOEXEC))
2377 return -EINVAL;
2378
2379 if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
2380 return -EINVAL;
2381
2382 if (flags & AT_NO_AUTOMOUNT)
2383 lookup_flags &= ~LOOKUP_AUTOMOUNT;
2384 if (flags & AT_SYMLINK_NOFOLLOW)
2385 lookup_flags &= ~LOOKUP_FOLLOW;
2386 if (flags & AT_EMPTY_PATH)
2387 lookup_flags |= LOOKUP_EMPTY;
2388
2389 if (detached && !may_mount())
2390 return -EPERM;
2391
2392 fd = get_unused_fd_flags(flags & O_CLOEXEC);
2393 if (fd < 0)
2394 return fd;
2395
2396 error = user_path_at(dfd, filename, lookup_flags, &path);
2397 if (unlikely(error)) {
2398 file = ERR_PTR(error);
2399 } else {
2400 if (detached)
2401 file = open_detached_copy(&path, flags & AT_RECURSIVE);
2402 else
2403 file = dentry_open(&path, O_PATH, current_cred());
2404 path_put(&path);
2405 }
2406 if (IS_ERR(file)) {
2407 put_unused_fd(fd);
2408 return PTR_ERR(file);
2409 }
2410 fd_install(fd, file);
2411 return fd;
2412}
2413
2291/* 2414/*
2292 * Don't allow locked mount flags to be cleared. 2415 * Don't allow locked mount flags to be cleared.
2293 * 2416 *
@@ -2426,72 +2549,117 @@ static inline int tree_contains_unbindable(struct mount *mnt)
2426 return 0; 2549 return 0;
2427} 2550}
2428 2551
2429static int do_move_mount(struct path *path, const char *old_name) 2552/*
2553 * Check that there aren't references to earlier/same mount namespaces in the
2554 * specified subtree. Such references can act as pins for mount namespaces
2555 * that aren't checked by the mount-cycle checking code, thereby allowing
2556 * cycles to be made.
2557 */
2558static bool check_for_nsfs_mounts(struct mount *subtree)
2430{ 2559{
2431 struct path old_path, parent_path; 2560 struct mount *p;
2561 bool ret = false;
2562
2563 lock_mount_hash();
2564 for (p = subtree; p; p = next_mnt(p, subtree))
2565 if (mnt_ns_loop(p->mnt.mnt_root))
2566 goto out;
2567
2568 ret = true;
2569out:
2570 unlock_mount_hash();
2571 return ret;
2572}
2573
2574static int do_move_mount(struct path *old_path, struct path *new_path)
2575{
2576 struct path parent_path = {.mnt = NULL, .dentry = NULL};
2577 struct mnt_namespace *ns;
2432 struct mount *p; 2578 struct mount *p;
2433 struct mount *old; 2579 struct mount *old;
2434 struct mountpoint *mp; 2580 struct mountpoint *mp;
2435 int err; 2581 int err;
2436 if (!old_name || !*old_name) 2582 bool attached;
2437 return -EINVAL;
2438 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2439 if (err)
2440 return err;
2441 2583
2442 mp = lock_mount(path); 2584 mp = lock_mount(new_path);
2443 err = PTR_ERR(mp);
2444 if (IS_ERR(mp)) 2585 if (IS_ERR(mp))
2445 goto out; 2586 return PTR_ERR(mp);
2446 2587
2447 old = real_mount(old_path.mnt); 2588 old = real_mount(old_path->mnt);
2448 p = real_mount(path->mnt); 2589 p = real_mount(new_path->mnt);
2590 attached = mnt_has_parent(old);
2591 ns = old->mnt_ns;
2449 2592
2450 err = -EINVAL; 2593 err = -EINVAL;
2451 if (!check_mnt(p) || !check_mnt(old)) 2594 /* The mountpoint must be in our namespace. */
2452 goto out1; 2595 if (!check_mnt(p))
2596 goto out;
2453 2597
2454 if (old->mnt.mnt_flags & MNT_LOCKED) 2598 /* The thing moved should be either ours or completely unattached. */
2455 goto out1; 2599 if (attached && !check_mnt(old))
2600 goto out;
2456 2601
2457 err = -EINVAL; 2602 if (!attached && !is_anon_ns(ns))
2458 if (old_path.dentry != old_path.mnt->mnt_root) 2603 goto out;
2459 goto out1;
2460 2604
2461 if (!mnt_has_parent(old)) 2605 if (old->mnt.mnt_flags & MNT_LOCKED)
2462 goto out1; 2606 goto out;
2463 2607
2464 if (d_is_dir(path->dentry) != 2608 if (old_path->dentry != old_path->mnt->mnt_root)
2465 d_is_dir(old_path.dentry)) 2609 goto out;
2466 goto out1; 2610
2611 if (d_is_dir(new_path->dentry) !=
2612 d_is_dir(old_path->dentry))
2613 goto out;
2467 /* 2614 /*
2468 * Don't move a mount residing in a shared parent. 2615 * Don't move a mount residing in a shared parent.
2469 */ 2616 */
2470 if (IS_MNT_SHARED(old->mnt_parent)) 2617 if (attached && IS_MNT_SHARED(old->mnt_parent))
2471 goto out1; 2618 goto out;
2472 /* 2619 /*
2473 * Don't move a mount tree containing unbindable mounts to a destination 2620 * Don't move a mount tree containing unbindable mounts to a destination
2474 * mount which is shared. 2621 * mount which is shared.
2475 */ 2622 */
2476 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old)) 2623 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
2477 goto out1; 2624 goto out;
2478 err = -ELOOP; 2625 err = -ELOOP;
2626 if (!check_for_nsfs_mounts(old))
2627 goto out;
2479 for (; mnt_has_parent(p); p = p->mnt_parent) 2628 for (; mnt_has_parent(p); p = p->mnt_parent)
2480 if (p == old) 2629 if (p == old)
2481 goto out1; 2630 goto out;
2482 2631
2483 err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path); 2632 err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
2633 attached ? &parent_path : NULL);
2484 if (err) 2634 if (err)
2485 goto out1; 2635 goto out;
2486 2636
2487 /* if the mount is moved, it should no longer be expire 2637 /* if the mount is moved, it should no longer be expire
2488 * automatically */ 2638 * automatically */
2489 list_del_init(&old->mnt_expire); 2639 list_del_init(&old->mnt_expire);
2490out1:
2491 unlock_mount(mp);
2492out: 2640out:
2493 if (!err) 2641 unlock_mount(mp);
2642 if (!err) {
2494 path_put(&parent_path); 2643 path_put(&parent_path);
2644 if (!attached)
2645 free_mnt_ns(ns);
2646 }
2647 return err;
2648}
2649
2650static int do_move_mount_old(struct path *path, const char *old_name)
2651{
2652 struct path old_path;
2653 int err;
2654
2655 if (!old_name || !*old_name)
2656 return -EINVAL;
2657
2658 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2659 if (err)
2660 return err;
2661
2662 err = do_move_mount(&old_path, path);
2495 path_put(&old_path); 2663 path_put(&old_path);
2496 return err; 2664 return err;
2497} 2665}
@@ -2937,7 +3105,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
2937 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) 3105 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2938 retval = do_change_type(&path, flags); 3106 retval = do_change_type(&path, flags);
2939 else if (flags & MS_MOVE) 3107 else if (flags & MS_MOVE)
2940 retval = do_move_mount(&path, dev_name); 3108 retval = do_move_mount_old(&path, dev_name);
2941 else 3109 else
2942 retval = do_new_mount(&path, type_page, sb_flags, mnt_flags, 3110 retval = do_new_mount(&path, type_page, sb_flags, mnt_flags,
2943 dev_name, data_page); 3111 dev_name, data_page);
@@ -3166,6 +3334,203 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
3166} 3334}
3167 3335
3168/* 3336/*
3337 * Create a kernel mount representation for a new, prepared superblock
3338 * (specified by fs_fd) and attach to an open_tree-like file descriptor.
3339 */
3340SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
3341 unsigned int, attr_flags)
3342{
3343 struct mnt_namespace *ns;
3344 struct fs_context *fc;
3345 struct file *file;
3346 struct path newmount;
3347 struct mount *mnt;
3348 struct fd f;
3349 unsigned int mnt_flags = 0;
3350 long ret;
3351
3352 if (!may_mount())
3353 return -EPERM;
3354
3355 if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
3356 return -EINVAL;
3357
3358 if (attr_flags & ~(MOUNT_ATTR_RDONLY |
3359 MOUNT_ATTR_NOSUID |
3360 MOUNT_ATTR_NODEV |
3361 MOUNT_ATTR_NOEXEC |
3362 MOUNT_ATTR__ATIME |
3363 MOUNT_ATTR_NODIRATIME))
3364 return -EINVAL;
3365
3366 if (attr_flags & MOUNT_ATTR_RDONLY)
3367 mnt_flags |= MNT_READONLY;
3368 if (attr_flags & MOUNT_ATTR_NOSUID)
3369 mnt_flags |= MNT_NOSUID;
3370 if (attr_flags & MOUNT_ATTR_NODEV)
3371 mnt_flags |= MNT_NODEV;
3372 if (attr_flags & MOUNT_ATTR_NOEXEC)
3373 mnt_flags |= MNT_NOEXEC;
3374 if (attr_flags & MOUNT_ATTR_NODIRATIME)
3375 mnt_flags |= MNT_NODIRATIME;
3376
3377 switch (attr_flags & MOUNT_ATTR__ATIME) {
3378 case MOUNT_ATTR_STRICTATIME:
3379 break;
3380 case MOUNT_ATTR_NOATIME:
3381 mnt_flags |= MNT_NOATIME;
3382 break;
3383 case MOUNT_ATTR_RELATIME:
3384 mnt_flags |= MNT_RELATIME;
3385 break;
3386 default:
3387 return -EINVAL;
3388 }
3389
3390 f = fdget(fs_fd);
3391 if (!f.file)
3392 return -EBADF;
3393
3394 ret = -EINVAL;
3395 if (f.file->f_op != &fscontext_fops)
3396 goto err_fsfd;
3397
3398 fc = f.file->private_data;
3399
3400 ret = mutex_lock_interruptible(&fc->uapi_mutex);
3401 if (ret < 0)
3402 goto err_fsfd;
3403
3404 /* There must be a valid superblock or we can't mount it */
3405 ret = -EINVAL;
3406 if (!fc->root)
3407 goto err_unlock;
3408
3409 ret = -EPERM;
3410 if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
3411 pr_warn("VFS: Mount too revealing\n");
3412 goto err_unlock;
3413 }
3414
3415 ret = -EBUSY;
3416 if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
3417 goto err_unlock;
3418
3419 ret = -EPERM;
3420 if ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock())
3421 goto err_unlock;
3422
3423 newmount.mnt = vfs_create_mount(fc);
3424 if (IS_ERR(newmount.mnt)) {
3425 ret = PTR_ERR(newmount.mnt);
3426 goto err_unlock;
3427 }
3428 newmount.dentry = dget(fc->root);
3429 newmount.mnt->mnt_flags = mnt_flags;
3430
3431 /* We've done the mount bit - now move the file context into more or
3432 * less the same state as if we'd done an fspick(). We don't want to
3433 * do any memory allocation or anything like that at this point as we
3434 * don't want to have to handle any errors incurred.
3435 */
3436 vfs_clean_context(fc);
3437
3438 ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);
3439 if (IS_ERR(ns)) {
3440 ret = PTR_ERR(ns);
3441 goto err_path;
3442 }
3443 mnt = real_mount(newmount.mnt);
3444 mnt->mnt_ns = ns;
3445 ns->root = mnt;
3446 ns->mounts = 1;
3447 list_add(&mnt->mnt_list, &ns->list);
3448
3449 /* Attach to an apparent O_PATH fd with a note that we need to unmount
3450 * it, not just simply put it.
3451 */
3452 file = dentry_open(&newmount, O_PATH, fc->cred);
3453 if (IS_ERR(file)) {
3454 dissolve_on_fput(newmount.mnt);
3455 ret = PTR_ERR(file);
3456 goto err_path;
3457 }
3458 file->f_mode |= FMODE_NEED_UNMOUNT;
3459
3460 ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
3461 if (ret >= 0)
3462 fd_install(ret, file);
3463 else
3464 fput(file);
3465
3466err_path:
3467 path_put(&newmount);
3468err_unlock:
3469 mutex_unlock(&fc->uapi_mutex);
3470err_fsfd:
3471 fdput(f);
3472 return ret;
3473}
3474
3475/*
3476 * Move a mount from one place to another. In combination with
3477 * fsopen()/fsmount() this is used to install a new mount and in combination
3478 * with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be used to copy
3479 * a mount subtree.
3480 *
3481 * Note the flags value is a combination of MOVE_MOUNT_* flags.
3482 */
3483SYSCALL_DEFINE5(move_mount,
3484 int, from_dfd, const char *, from_pathname,
3485 int, to_dfd, const char *, to_pathname,
3486 unsigned int, flags)
3487{
3488 struct path from_path, to_path;
3489 unsigned int lflags;
3490 int ret = 0;
3491
3492 if (!may_mount())
3493 return -EPERM;
3494
3495 if (flags & ~MOVE_MOUNT__MASK)
3496 return -EINVAL;
3497
3498 /* If someone gives a pathname, they aren't permitted to move
3499 * from an fd that requires unmount as we can't get at the flag
3500 * to clear it afterwards.
3501 */
3502 lflags = 0;
3503 if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW;
3504 if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
3505 if (flags & MOVE_MOUNT_F_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
3506
3507 ret = user_path_at(from_dfd, from_pathname, lflags, &from_path);
3508 if (ret < 0)
3509 return ret;
3510
3511 lflags = 0;
3512 if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW;
3513 if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
3514 if (flags & MOVE_MOUNT_T_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
3515
3516 ret = user_path_at(to_dfd, to_pathname, lflags, &to_path);
3517 if (ret < 0)
3518 goto out_from;
3519
3520 ret = security_move_mount(&from_path, &to_path);
3521 if (ret < 0)
3522 goto out_to;
3523
3524 ret = do_move_mount(&from_path, &to_path);
3525
3526out_to:
3527 path_put(&to_path);
3528out_from:
3529 path_put(&from_path);
3530 return ret;
3531}
3532
3533/*
3169 * Return true if path is reachable from root 3534 * Return true if path is reachable from root
3170 * 3535 *
3171 * namespace_sem or mount_lock is held 3536 * namespace_sem or mount_lock is held
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5174405e40d5..ec07f4c5630d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -165,10 +165,13 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
165#define FMODE_NONOTIFY ((__force fmode_t)0x4000000) 165#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
166 166
167/* File is capable of returning -EAGAIN if I/O will block */ 167/* File is capable of returning -EAGAIN if I/O will block */
168#define FMODE_NOWAIT ((__force fmode_t)0x8000000) 168#define FMODE_NOWAIT ((__force fmode_t)0x8000000)
169
170/* File represents mount that needs unmounting */
171#define FMODE_NEED_UNMOUNT ((__force fmode_t)0x10000000)
169 172
170/* File does not contribute to nr_files count */ 173/* File does not contribute to nr_files count */
171#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) 174#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)
172 175
173/* 176/*
174 * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector 177 * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index eaca452088fa..1f966670c8dc 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -13,8 +13,10 @@
13#define _LINUX_FS_CONTEXT_H 13#define _LINUX_FS_CONTEXT_H
14 14
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/refcount.h>
16#include <linux/errno.h> 17#include <linux/errno.h>
17#include <linux/security.h> 18#include <linux/security.h>
19#include <linux/mutex.h>
18 20
19struct cred; 21struct cred;
20struct dentry; 22struct dentry;
@@ -35,6 +37,19 @@ enum fs_context_purpose {
35}; 37};
36 38
37/* 39/*
40 * Userspace usage phase for fsopen/fspick.
41 */
42enum fs_context_phase {
43 FS_CONTEXT_CREATE_PARAMS, /* Loading params for sb creation */
44 FS_CONTEXT_CREATING, /* A superblock is being created */
45 FS_CONTEXT_AWAITING_MOUNT, /* Superblock created, awaiting fsmount() */
46 FS_CONTEXT_AWAITING_RECONF, /* Awaiting initialisation for reconfiguration */
47 FS_CONTEXT_RECONF_PARAMS, /* Loading params for reconfiguration */
48 FS_CONTEXT_RECONFIGURING, /* Reconfiguring the superblock */
49 FS_CONTEXT_FAILED, /* Failed to correctly transition a context */
50};
51
52/*
38 * Type of parameter value. 53 * Type of parameter value.
39 */ 54 */
40enum fs_value_type { 55enum fs_value_type {
@@ -74,12 +89,14 @@ struct fs_parameter {
74 */ 89 */
75struct fs_context { 90struct fs_context {
76 const struct fs_context_operations *ops; 91 const struct fs_context_operations *ops;
92 struct mutex uapi_mutex; /* Userspace access mutex */
77 struct file_system_type *fs_type; 93 struct file_system_type *fs_type;
78 void *fs_private; /* The filesystem's context */ 94 void *fs_private; /* The filesystem's context */
79 struct dentry *root; /* The root and superblock */ 95 struct dentry *root; /* The root and superblock */
80 struct user_namespace *user_ns; /* The user namespace for this mount */ 96 struct user_namespace *user_ns; /* The user namespace for this mount */
81 struct net *net_ns; /* The network namespace for this mount */ 97 struct net *net_ns; /* The network namespace for this mount */
82 const struct cred *cred; /* The mounter's credentials */ 98 const struct cred *cred; /* The mounter's credentials */
99 struct fc_log *log; /* Logging buffer */
83 const char *source; /* The source name (eg. dev path) */ 100 const char *source; /* The source name (eg. dev path) */
84 const char *subtype; /* The subtype to set on the superblock */ 101 const char *subtype; /* The subtype to set on the superblock */
85 void *security; /* Linux S&M options */ 102 void *security; /* Linux S&M options */
@@ -88,6 +105,7 @@ struct fs_context {
88 unsigned int sb_flags_mask; /* Superblock flags that were changed */ 105 unsigned int sb_flags_mask; /* Superblock flags that were changed */
89 unsigned int lsm_flags; /* Information flags from the fs to the LSM */ 106 unsigned int lsm_flags; /* Information flags from the fs to the LSM */
90 enum fs_context_purpose purpose:8; 107 enum fs_context_purpose purpose:8;
108 enum fs_context_phase phase:8; /* The phase the context is in */
91 bool need_free:1; /* Need to call ops->free() */ 109 bool need_free:1; /* Need to call ops->free() */
92 bool global:1; /* Goes into &init_user_ns */ 110 bool global:1; /* Goes into &init_user_ns */
93}; 111};
@@ -135,15 +153,21 @@ extern int vfs_get_super(struct fs_context *fc,
135 153
136extern const struct file_operations fscontext_fops; 154extern const struct file_operations fscontext_fops;
137 155
138#ifdef CONFIG_PRINTK 156/*
157 * Mount error, warning and informational message logging. This structure is
158 * shareable between a mount and a subordinate mount.
159 */
160struct fc_log {
161 refcount_t usage;
162 u8 head; /* Insertion index in buffer[] */
163 u8 tail; /* Removal index in buffer[] */
164 u8 need_free; /* Mask of kfree'able items in buffer[] */
165 struct module *owner; /* Owner module for strings that don't then need freeing */
166 char *buffer[8];
167};
168
139extern __attribute__((format(printf, 2, 3))) 169extern __attribute__((format(printf, 2, 3)))
140void logfc(struct fs_context *fc, const char *fmt, ...); 170void logfc(struct fs_context *fc, const char *fmt, ...);
141#else
142static inline __attribute__((format(printf, 2, 3)))
143void logfc(struct fs_context *fc, const char *fmt, ...)
144{
145}
146#endif
147 171
148/** 172/**
149 * infof - Store supplementary informational message 173 * infof - Store supplementary informational message
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index f7e55d0d2672..47f58cfb6a19 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -159,6 +159,10 @@
159 * Parse a string of security data filling in the opts structure 159 * Parse a string of security data filling in the opts structure
160 * @options string containing all mount options known by the LSM 160 * @options string containing all mount options known by the LSM
161 * @opts binary data structure usable by the LSM 161 * @opts binary data structure usable by the LSM
162 * @move_mount:
163 * Check permission before a mount is moved.
164 * @from_path indicates the mount that is going to be moved.
165 * @to_path indicates the mountpoint that will be mounted upon.
162 * @dentry_init_security: 166 * @dentry_init_security:
163 * Compute a context for a dentry as the inode is not yet available 167 * Compute a context for a dentry as the inode is not yet available
164 * since NFSv4 has no label backed by an EA anyway. 168 * since NFSv4 has no label backed by an EA anyway.
@@ -1502,6 +1506,7 @@ union security_list_options {
1502 unsigned long *set_kern_flags); 1506 unsigned long *set_kern_flags);
1503 int (*sb_add_mnt_opt)(const char *option, const char *val, int len, 1507 int (*sb_add_mnt_opt)(const char *option, const char *val, int len,
1504 void **mnt_opts); 1508 void **mnt_opts);
1509 int (*move_mount)(const struct path *from_path, const struct path *to_path);
1505 int (*dentry_init_security)(struct dentry *dentry, int mode, 1510 int (*dentry_init_security)(struct dentry *dentry, int mode,
1506 const struct qstr *name, void **ctx, 1511 const struct qstr *name, void **ctx,
1507 u32 *ctxlen); 1512 u32 *ctxlen);
@@ -1839,6 +1844,7 @@ struct security_hook_heads {
1839 struct hlist_head sb_set_mnt_opts; 1844 struct hlist_head sb_set_mnt_opts;
1840 struct hlist_head sb_clone_mnt_opts; 1845 struct hlist_head sb_clone_mnt_opts;
1841 struct hlist_head sb_add_mnt_opt; 1846 struct hlist_head sb_add_mnt_opt;
1847 struct hlist_head move_mount;
1842 struct hlist_head dentry_init_security; 1848 struct hlist_head dentry_init_security;
1843 struct hlist_head dentry_create_files_as; 1849 struct hlist_head dentry_create_files_as;
1844#ifdef CONFIG_SECURITY_PATH 1850#ifdef CONFIG_SECURITY_PATH
diff --git a/include/linux/module.h b/include/linux/module.h
index 5bf5dcd91009..7dc4dc79b634 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -709,6 +709,12 @@ static inline bool is_module_text_address(unsigned long addr)
709 return false; 709 return false;
710} 710}
711 711
712static inline bool within_module_core(unsigned long addr,
713 const struct module *mod)
714{
715 return false;
716}
717
712/* Get/put a kernel symbol (calls should be symmetric) */ 718/* Get/put a kernel symbol (calls should be symmetric) */
713#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); }) 719#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); })
714#define symbol_put(x) do { } while (0) 720#define symbol_put(x) do { } while (0)
diff --git a/include/linux/security.h b/include/linux/security.h
index d543293216b9..659071c2e57c 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -251,6 +251,7 @@ int security_sb_clone_mnt_opts(const struct super_block *oldsb,
251 unsigned long *set_kern_flags); 251 unsigned long *set_kern_flags);
252int security_add_mnt_opt(const char *option, const char *val, 252int security_add_mnt_opt(const char *option, const char *val,
253 int len, void **mnt_opts); 253 int len, void **mnt_opts);
254int security_move_mount(const struct path *from_path, const struct path *to_path);
254int security_dentry_init_security(struct dentry *dentry, int mode, 255int security_dentry_init_security(struct dentry *dentry, int mode,
255 const struct qstr *name, void **ctx, 256 const struct qstr *name, void **ctx,
256 u32 *ctxlen); 257 u32 *ctxlen);
@@ -614,6 +615,12 @@ static inline int security_add_mnt_opt(const char *option, const char *val,
614 return 0; 615 return 0;
615} 616}
616 617
618static inline int security_move_mount(const struct path *from_path,
619 const struct path *to_path)
620{
621 return 0;
622}
623
617static inline int security_inode_alloc(struct inode *inode) 624static inline int security_inode_alloc(struct inode *inode)
618{ 625{
619 return 0; 626 return 0;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e446806a561f..e2870fe1be5b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -985,6 +985,15 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags,
985 unsigned mask, struct statx __user *buffer); 985 unsigned mask, struct statx __user *buffer);
986asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, 986asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len,
987 int flags, uint32_t sig); 987 int flags, uint32_t sig);
988asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags);
989asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path,
990 int to_dfd, const char __user *to_path,
991 unsigned int ms_flags);
992asmlinkage long sys_fsopen(const char __user *fs_name, unsigned int flags);
993asmlinkage long sys_fsconfig(int fs_fd, unsigned int cmd, const char __user *key,
994 const void __user *value, int aux);
995asmlinkage long sys_fsmount(int fs_fd, unsigned int flags, unsigned int ms_flags);
996asmlinkage long sys_fspick(int dfd, const char __user *path, unsigned int flags);
988asmlinkage long sys_pidfd_send_signal(int pidfd, int sig, 997asmlinkage long sys_pidfd_send_signal(int pidfd, int sig,
989 siginfo_t __user *info, 998 siginfo_t __user *info,
990 unsigned int flags); 999 unsigned int flags);
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index a2f8658f1c55..1d338357df8a 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -91,5 +91,7 @@
91#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ 91#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */
92#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ 92#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */
93 93
94#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
95
94 96
95#endif /* _UAPI_LINUX_FCNTL_H */ 97#endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index 3f9ec42510b0..96a0240f23fe 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -55,4 +55,66 @@
55#define MS_MGC_VAL 0xC0ED0000 55#define MS_MGC_VAL 0xC0ED0000
56#define MS_MGC_MSK 0xffff0000 56#define MS_MGC_MSK 0xffff0000
57 57
58/*
59 * open_tree() flags.
60 */
61#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */
62#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */
63
64/*
65 * move_mount() flags.
66 */
67#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */
68#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */
69#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
70#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */
71#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */
72#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
73#define MOVE_MOUNT__MASK 0x00000077
74
75/*
76 * fsopen() flags.
77 */
78#define FSOPEN_CLOEXEC 0x00000001
79
80/*
81 * fspick() flags.
82 */
83#define FSPICK_CLOEXEC 0x00000001
84#define FSPICK_SYMLINK_NOFOLLOW 0x00000002
85#define FSPICK_NO_AUTOMOUNT 0x00000004
86#define FSPICK_EMPTY_PATH 0x00000008
87
88/*
89 * The type of fsconfig() call made.
90 */
91enum fsconfig_command {
92 FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */
93 FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */
94 FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */
95 FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */
96 FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */
97 FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */
98 FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */
99 FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */
100};
101
102/*
103 * fsmount() flags.
104 */
105#define FSMOUNT_CLOEXEC 0x00000001
106
107/*
108 * Mount attributes.
109 */
110#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */
111#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */
112#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */
113#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */
114#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */
115#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */
116#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */
117#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */
118#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */
119
58#endif /* _UAPI_LINUX_MOUNT_H */ 120#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/samples/Kconfig b/samples/Kconfig
index d19754ccad08..30a89425009c 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -154,10 +154,11 @@ config SAMPLE_ANDROID_BINDERFS
154 Builds a sample program to illustrate the use of the Android binderfs 154 Builds a sample program to illustrate the use of the Android binderfs
155 filesystem. 155 filesystem.
156 156
157config SAMPLE_STATX 157config SAMPLE_VFS
158 bool "Build example extended-stat using code" 158 bool "Build example programs that use new VFS system calls"
159 depends on BROKEN
160 help 159 help
161 Build example userspace program to use the new extended-stat syscall. 160 Build example userspace programs that use new VFS system calls such
161 as mount API and statx(). Note that this is restricted to the x86
162 arch whilst it accesses system calls that aren't yet in all arches.
162 163
163endif # SAMPLES 164endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
index fadadb1c3b05..2484cc262d3e 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -3,4 +3,4 @@
3obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \ 3obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \
4 hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \ 4 hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
5 configfs/ connector/ v4l/ trace_printk/ \ 5 configfs/ connector/ v4l/ trace_printk/ \
6 vfio-mdev/ statx/ qmi/ binderfs/ pidfd/ 6 vfio-mdev/ vfs/ qmi/ binderfs/ pidfd/
diff --git a/samples/statx/Makefile b/samples/vfs/Makefile
index 59df7c25a9d1..4ac9690fb3c4 100644
--- a/samples/statx/Makefile
+++ b/samples/vfs/Makefile
@@ -1,7 +1,10 @@
1# List of programs to build 1# List of programs to build
2hostprogs-$(CONFIG_SAMPLE_STATX) := test-statx 2hostprogs-$(CONFIG_SAMPLE_VFS) := \
3 test-fsmount \
4 test-statx
3 5
4# Tell kbuild to always build the programs 6# Tell kbuild to always build the programs
5always := $(hostprogs-y) 7always := $(hostprogs-y)
6 8
9HOSTCFLAGS_test-fsmount.o += -I$(objtree)/usr/include
7HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include 10HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include
diff --git a/samples/vfs/test-fsmount.c b/samples/vfs/test-fsmount.c
new file mode 100644
index 000000000000..266d72b3dce4
--- /dev/null
+++ b/samples/vfs/test-fsmount.c
@@ -0,0 +1,133 @@
1/* fd-based mount test.
2 *
3 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <stdio.h>
13#include <stdlib.h>
14#include <unistd.h>
15#include <errno.h>
16#include <fcntl.h>
17#include <sys/prctl.h>
18#include <sys/wait.h>
19#include <linux/mount.h>
20#include <linux/unistd.h>
21
22#define E(x) do { if ((x) == -1) { perror(#x); exit(1); } } while(0)
23
24static void check_messages(int fd)
25{
26 char buf[4096];
27 int err, n;
28
29 err = errno;
30
31 for (;;) {
32 n = read(fd, buf, sizeof(buf));
33 if (n < 0)
34 break;
35 n -= 2;
36
37 switch (buf[0]) {
38 case 'e':
39 fprintf(stderr, "Error: %*.*s\n", n, n, buf + 2);
40 break;
41 case 'w':
42 fprintf(stderr, "Warning: %*.*s\n", n, n, buf + 2);
43 break;
44 case 'i':
45 fprintf(stderr, "Info: %*.*s\n", n, n, buf + 2);
46 break;
47 }
48 }
49
50 errno = err;
51}
52
53static __attribute__((noreturn))
54void mount_error(int fd, const char *s)
55{
56 check_messages(fd);
57 fprintf(stderr, "%s: %m\n", s);
58 exit(1);
59}
60
61/* Hope -1 isn't a syscall */
62#ifndef __NR_fsopen
63#define __NR_fsopen -1
64#endif
65#ifndef __NR_fsmount
66#define __NR_fsmount -1
67#endif
68#ifndef __NR_fsconfig
69#define __NR_fsconfig -1
70#endif
71#ifndef __NR_move_mount
72#define __NR_move_mount -1
73#endif
74
75
76static inline int fsopen(const char *fs_name, unsigned int flags)
77{
78 return syscall(__NR_fsopen, fs_name, flags);
79}
80
81static inline int fsmount(int fsfd, unsigned int flags, unsigned int ms_flags)
82{
83 return syscall(__NR_fsmount, fsfd, flags, ms_flags);
84}
85
86static inline int fsconfig(int fsfd, unsigned int cmd,
87 const char *key, const void *val, int aux)
88{
89 return syscall(__NR_fsconfig, fsfd, cmd, key, val, aux);
90}
91
92static inline int move_mount(int from_dfd, const char *from_pathname,
93 int to_dfd, const char *to_pathname,
94 unsigned int flags)
95{
96 return syscall(__NR_move_mount,
97 from_dfd, from_pathname,
98 to_dfd, to_pathname, flags);
99}
100
101#define E_fsconfig(fd, cmd, key, val, aux) \
102 do { \
103 if (fsconfig(fd, cmd, key, val, aux) == -1) \
104 mount_error(fd, key ?: "create"); \
105 } while (0)
106
107int main(int argc, char *argv[])
108{
109 int fsfd, mfd;
110
111 /* Mount a publically available AFS filesystem */
112 fsfd = fsopen("afs", 0);
113 if (fsfd == -1) {
114 perror("fsopen");
115 exit(1);
116 }
117
118 E_fsconfig(fsfd, FSCONFIG_SET_STRING, "source", "#grand.central.org:root.cell.", 0);
119 E_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
120
121 mfd = fsmount(fsfd, 0, MOUNT_ATTR_RDONLY);
122 if (mfd < 0)
123 mount_error(fsfd, "fsmount");
124 E(close(fsfd));
125
126 if (move_mount(mfd, "", AT_FDCWD, "/mnt", MOVE_MOUNT_F_EMPTY_PATH) < 0) {
127 perror("move_mount");
128 exit(1);
129 }
130
131 E(close(mfd));
132 exit(0);
133}
diff --git a/samples/statx/test-statx.c b/samples/vfs/test-statx.c
index d4d77b09412c..e91f918e84c4 100644
--- a/samples/statx/test-statx.c
+++ b/samples/vfs/test-statx.c
@@ -25,13 +25,21 @@
25#include <sys/types.h> 25#include <sys/types.h>
26#include <linux/stat.h> 26#include <linux/stat.h>
27#include <linux/fcntl.h> 27#include <linux/fcntl.h>
28#define statx foo
29#define statx_timestamp foo_timestamp
28#include <sys/stat.h> 30#include <sys/stat.h>
31#undef statx
32#undef statx_timestamp
29 33
30#define AT_STATX_SYNC_TYPE 0x6000 34#define AT_STATX_SYNC_TYPE 0x6000
31#define AT_STATX_SYNC_AS_STAT 0x0000 35#define AT_STATX_SYNC_AS_STAT 0x0000
32#define AT_STATX_FORCE_SYNC 0x2000 36#define AT_STATX_FORCE_SYNC 0x2000
33#define AT_STATX_DONT_SYNC 0x4000 37#define AT_STATX_DONT_SYNC 0x4000
34 38
39#ifndef __NR_statx
40#define __NR_statx -1
41#endif
42
35static __attribute__((unused)) 43static __attribute__((unused))
36ssize_t statx(int dfd, const char *filename, unsigned flags, 44ssize_t statx(int dfd, const char *filename, unsigned flags,
37 unsigned int mask, struct statx *buffer) 45 unsigned int mask, struct statx *buffer)
@@ -157,7 +165,8 @@ static void dump_statx(struct statx *stx)
157 "?dai?c??" /* 7- 0 0x00000000-000000ff */ 165 "?dai?c??" /* 7- 0 0x00000000-000000ff */
158 ; 166 ;
159 167
160 printf("Attributes: %016llx (", stx->stx_attributes); 168 printf("Attributes: %016llx (",
169 (unsigned long long)stx->stx_attributes);
161 for (byte = 64 - 8; byte >= 0; byte -= 8) { 170 for (byte = 64 - 8; byte >= 0; byte -= 8) {
162 bits = stx->stx_attributes >> byte; 171 bits = stx->stx_attributes >> byte;
163 mbits = stx->stx_attributes_mask >> byte; 172 mbits = stx->stx_attributes_mask >> byte;
diff --git a/security/security.c b/security/security.c
index 8d6ef9da94eb..613a5c00e602 100644
--- a/security/security.c
+++ b/security/security.c
@@ -866,6 +866,11 @@ int security_add_mnt_opt(const char *option, const char *val, int len,
866} 866}
867EXPORT_SYMBOL(security_add_mnt_opt); 867EXPORT_SYMBOL(security_add_mnt_opt);
868 868
869int security_move_mount(const struct path *from_path, const struct path *to_path)
870{
871 return call_int_hook(move_mount, 0, from_path, to_path);
872}
873
869int security_inode_alloc(struct inode *inode) 874int security_inode_alloc(struct inode *inode)
870{ 875{
871 int rc = lsm_inode_alloc(inode); 876 int rc = lsm_inode_alloc(inode);