diff options
| -rw-r--r-- | arch/x86/entry/syscalls/syscall_32.tbl | 7 | ||||
| -rw-r--r-- | arch/x86/entry/syscalls/syscall_64.tbl | 6 | ||||
| -rw-r--r-- | fs/Makefile | 2 | ||||
| -rw-r--r-- | fs/file_table.c | 9 | ||||
| -rw-r--r-- | fs/fs_context.c | 160 | ||||
| -rw-r--r-- | fs/fsopen.c | 477 | ||||
| -rw-r--r-- | fs/internal.h | 4 | ||||
| -rw-r--r-- | fs/namespace.c | 477 | ||||
| -rw-r--r-- | include/linux/fs.h | 7 | ||||
| -rw-r--r-- | include/linux/fs_context.h | 38 | ||||
| -rw-r--r-- | include/linux/lsm_hooks.h | 6 | ||||
| -rw-r--r-- | include/linux/module.h | 6 | ||||
| -rw-r--r-- | include/linux/security.h | 7 | ||||
| -rw-r--r-- | include/linux/syscalls.h | 9 | ||||
| -rw-r--r-- | include/uapi/linux/fcntl.h | 2 | ||||
| -rw-r--r-- | include/uapi/linux/mount.h | 62 | ||||
| -rw-r--r-- | samples/Kconfig | 9 | ||||
| -rw-r--r-- | samples/Makefile | 2 | ||||
| -rw-r--r-- | samples/vfs/Makefile (renamed from samples/statx/Makefile) | 5 | ||||
| -rw-r--r-- | samples/vfs/test-fsmount.c | 133 | ||||
| -rw-r--r-- | samples/vfs/test-statx.c (renamed from samples/statx/test-statx.c) | 11 | ||||
| -rw-r--r-- | security/security.c | 5 |
22 files changed, 1353 insertions, 91 deletions
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 1f9607ed087c..4cd5f982b1e5 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl | |||
| @@ -398,7 +398,12 @@ | |||
| 398 | 384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl | 398 | 384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl |
| 399 | 385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents | 399 | 385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents |
| 400 | 386 i386 rseq sys_rseq __ia32_sys_rseq | 400 | 386 i386 rseq sys_rseq __ia32_sys_rseq |
| 401 | # don't use numbers 387 through 392, add new calls at the end | 401 | 387 i386 open_tree sys_open_tree __ia32_sys_open_tree |
| 402 | 388 i386 move_mount sys_move_mount __ia32_sys_move_mount | ||
| 403 | 389 i386 fsopen sys_fsopen __ia32_sys_fsopen | ||
| 404 | 390 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig | ||
| 405 | 391 i386 fsmount sys_fsmount __ia32_sys_fsmount | ||
| 406 | 392 i386 fspick sys_fspick __ia32_sys_fspick | ||
| 402 | 393 i386 semget sys_semget __ia32_sys_semget | 407 | 393 i386 semget sys_semget __ia32_sys_semget |
| 403 | 394 i386 semctl sys_semctl __ia32_compat_sys_semctl | 408 | 394 i386 semctl sys_semctl __ia32_compat_sys_semctl |
| 404 | 395 i386 shmget sys_shmget __ia32_sys_shmget | 409 | 395 i386 shmget sys_shmget __ia32_sys_shmget |
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 92ee0b4378d4..64ca0d06259a 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl | |||
| @@ -343,6 +343,12 @@ | |||
| 343 | 332 common statx __x64_sys_statx | 343 | 332 common statx __x64_sys_statx |
| 344 | 333 common io_pgetevents __x64_sys_io_pgetevents | 344 | 333 common io_pgetevents __x64_sys_io_pgetevents |
| 345 | 334 common rseq __x64_sys_rseq | 345 | 334 common rseq __x64_sys_rseq |
| 346 | 335 common open_tree __x64_sys_open_tree | ||
| 347 | 336 common move_mount __x64_sys_move_mount | ||
| 348 | 337 common fsopen __x64_sys_fsopen | ||
| 349 | 338 common fsconfig __x64_sys_fsconfig | ||
| 350 | 339 common fsmount __x64_sys_fsmount | ||
| 351 | 340 common fspick __x64_sys_fspick | ||
| 346 | # don't use numbers 387 through 423, add new calls after the last | 352 | # don't use numbers 387 through 423, add new calls after the last |
| 347 | # 'common' entry | 353 | # 'common' entry |
| 348 | 424 common pidfd_send_signal __x64_sys_pidfd_send_signal | 354 | 424 common pidfd_send_signal __x64_sys_pidfd_send_signal |
diff --git a/fs/Makefile b/fs/Makefile index 35945f8139e6..5a51bc2489ba 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
| @@ -13,7 +13,7 @@ obj-y := open.o read_write.o file_table.o super.o \ | |||
| 13 | seq_file.o xattr.o libfs.o fs-writeback.o \ | 13 | seq_file.o xattr.o libfs.o fs-writeback.o \ |
| 14 | pnode.o splice.o sync.o utimes.o d_path.o \ | 14 | pnode.o splice.o sync.o utimes.o d_path.o \ |
| 15 | stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \ | 15 | stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \ |
| 16 | fs_types.o fs_context.o fs_parser.o | 16 | fs_types.o fs_context.o fs_parser.o fsopen.o |
| 17 | 17 | ||
| 18 | ifeq ($(CONFIG_BLOCK),y) | 18 | ifeq ($(CONFIG_BLOCK),y) |
| 19 | obj-y += buffer.o block_dev.o direct-io.o mpage.o | 19 | obj-y += buffer.o block_dev.o direct-io.o mpage.o |
diff --git a/fs/file_table.c b/fs/file_table.c index 155d7514a094..3f9c1b452c1d 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
| @@ -255,6 +255,7 @@ static void __fput(struct file *file) | |||
| 255 | struct dentry *dentry = file->f_path.dentry; | 255 | struct dentry *dentry = file->f_path.dentry; |
| 256 | struct vfsmount *mnt = file->f_path.mnt; | 256 | struct vfsmount *mnt = file->f_path.mnt; |
| 257 | struct inode *inode = file->f_inode; | 257 | struct inode *inode = file->f_inode; |
| 258 | fmode_t mode = file->f_mode; | ||
| 258 | 259 | ||
| 259 | if (unlikely(!(file->f_mode & FMODE_OPENED))) | 260 | if (unlikely(!(file->f_mode & FMODE_OPENED))) |
| 260 | goto out; | 261 | goto out; |
| @@ -277,18 +278,20 @@ static void __fput(struct file *file) | |||
| 277 | if (file->f_op->release) | 278 | if (file->f_op->release) |
| 278 | file->f_op->release(inode, file); | 279 | file->f_op->release(inode, file); |
| 279 | if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && | 280 | if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && |
| 280 | !(file->f_mode & FMODE_PATH))) { | 281 | !(mode & FMODE_PATH))) { |
| 281 | cdev_put(inode->i_cdev); | 282 | cdev_put(inode->i_cdev); |
| 282 | } | 283 | } |
| 283 | fops_put(file->f_op); | 284 | fops_put(file->f_op); |
| 284 | put_pid(file->f_owner.pid); | 285 | put_pid(file->f_owner.pid); |
| 285 | if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) | 286 | if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) |
| 286 | i_readcount_dec(inode); | 287 | i_readcount_dec(inode); |
| 287 | if (file->f_mode & FMODE_WRITER) { | 288 | if (mode & FMODE_WRITER) { |
| 288 | put_write_access(inode); | 289 | put_write_access(inode); |
| 289 | __mnt_drop_write(mnt); | 290 | __mnt_drop_write(mnt); |
| 290 | } | 291 | } |
| 291 | dput(dentry); | 292 | dput(dentry); |
| 293 | if (unlikely(mode & FMODE_NEED_UNMOUNT)) | ||
| 294 | dissolve_on_fput(mnt); | ||
| 292 | mntput(mnt); | 295 | mntput(mnt); |
| 293 | out: | 296 | out: |
| 294 | file_free(file); | 297 | file_free(file); |
diff --git a/fs/fs_context.c b/fs/fs_context.c index 87e3546b9a52..a47ccd5a4a78 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | */ | 11 | */ |
| 12 | 12 | ||
| 13 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 13 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| 14 | #include <linux/module.h> | ||
| 14 | #include <linux/fs_context.h> | 15 | #include <linux/fs_context.h> |
| 15 | #include <linux/fs_parser.h> | 16 | #include <linux/fs_parser.h> |
| 16 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
| @@ -23,6 +24,7 @@ | |||
| 23 | #include <linux/pid_namespace.h> | 24 | #include <linux/pid_namespace.h> |
| 24 | #include <linux/user_namespace.h> | 25 | #include <linux/user_namespace.h> |
| 25 | #include <net/net_namespace.h> | 26 | #include <net/net_namespace.h> |
| 27 | #include <asm/sections.h> | ||
| 26 | #include "mount.h" | 28 | #include "mount.h" |
| 27 | #include "internal.h" | 29 | #include "internal.h" |
| 28 | 30 | ||
| @@ -271,6 +273,8 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, | |||
| 271 | fc->cred = get_current_cred(); | 273 | fc->cred = get_current_cred(); |
| 272 | fc->net_ns = get_net(current->nsproxy->net_ns); | 274 | fc->net_ns = get_net(current->nsproxy->net_ns); |
| 273 | 275 | ||
| 276 | mutex_init(&fc->uapi_mutex); | ||
| 277 | |||
| 274 | switch (purpose) { | 278 | switch (purpose) { |
| 275 | case FS_CONTEXT_FOR_MOUNT: | 279 | case FS_CONTEXT_FOR_MOUNT: |
| 276 | fc->user_ns = get_user_ns(fc->cred->user_ns); | 280 | fc->user_ns = get_user_ns(fc->cred->user_ns); |
| @@ -353,6 +357,8 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc) | |||
| 353 | if (!fc) | 357 | if (!fc) |
| 354 | return ERR_PTR(-ENOMEM); | 358 | return ERR_PTR(-ENOMEM); |
| 355 | 359 | ||
| 360 | mutex_init(&fc->uapi_mutex); | ||
| 361 | |||
| 356 | fc->fs_private = NULL; | 362 | fc->fs_private = NULL; |
| 357 | fc->s_fs_info = NULL; | 363 | fc->s_fs_info = NULL; |
| 358 | fc->source = NULL; | 364 | fc->source = NULL; |
| @@ -361,6 +367,8 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc) | |||
| 361 | get_net(fc->net_ns); | 367 | get_net(fc->net_ns); |
| 362 | get_user_ns(fc->user_ns); | 368 | get_user_ns(fc->user_ns); |
| 363 | get_cred(fc->cred); | 369 | get_cred(fc->cred); |
| 370 | if (fc->log) | ||
| 371 | refcount_inc(&fc->log->usage); | ||
| 364 | 372 | ||
| 365 | /* Can't call put until we've called ->dup */ | 373 | /* Can't call put until we've called ->dup */ |
| 366 | ret = fc->ops->dup(fc, src_fc); | 374 | ret = fc->ops->dup(fc, src_fc); |
| @@ -378,7 +386,6 @@ err_fc: | |||
| 378 | } | 386 | } |
| 379 | EXPORT_SYMBOL(vfs_dup_fs_context); | 387 | EXPORT_SYMBOL(vfs_dup_fs_context); |
| 380 | 388 | ||
| 381 | #ifdef CONFIG_PRINTK | ||
| 382 | /** | 389 | /** |
| 383 | * logfc - Log a message to a filesystem context | 390 | * logfc - Log a message to a filesystem context |
| 384 | * @fc: The filesystem context to log to. | 391 | * @fc: The filesystem context to log to. |
| @@ -386,27 +393,100 @@ EXPORT_SYMBOL(vfs_dup_fs_context); | |||
| 386 | */ | 393 | */ |
| 387 | void logfc(struct fs_context *fc, const char *fmt, ...) | 394 | void logfc(struct fs_context *fc, const char *fmt, ...) |
| 388 | { | 395 | { |
| 396 | static const char store_failure[] = "OOM: Can't store error string"; | ||
| 397 | struct fc_log *log = fc ? fc->log : NULL; | ||
| 398 | const char *p; | ||
| 389 | va_list va; | 399 | va_list va; |
| 400 | char *q; | ||
| 401 | u8 freeable; | ||
| 390 | 402 | ||
| 391 | va_start(va, fmt); | 403 | va_start(va, fmt); |
| 392 | 404 | if (!strchr(fmt, '%')) { | |
| 393 | switch (fmt[0]) { | 405 | p = fmt; |
| 394 | case 'w': | 406 | goto unformatted_string; |
| 395 | vprintk_emit(0, LOGLEVEL_WARNING, NULL, 0, fmt, va); | 407 | } |
| 396 | break; | 408 | if (strcmp(fmt, "%s") == 0) { |
| 397 | case 'e': | 409 | p = va_arg(va, const char *); |
| 398 | vprintk_emit(0, LOGLEVEL_ERR, NULL, 0, fmt, va); | 410 | goto unformatted_string; |
| 399 | break; | ||
| 400 | default: | ||
| 401 | vprintk_emit(0, LOGLEVEL_NOTICE, NULL, 0, fmt, va); | ||
| 402 | break; | ||
| 403 | } | 411 | } |
| 404 | 412 | ||
| 405 | pr_cont("\n"); | 413 | q = kvasprintf(GFP_KERNEL, fmt, va); |
| 414 | copied_string: | ||
| 415 | if (!q) | ||
| 416 | goto store_failure; | ||
| 417 | freeable = 1; | ||
| 418 | goto store_string; | ||
| 419 | |||
| 420 | unformatted_string: | ||
| 421 | if ((unsigned long)p >= (unsigned long)__start_rodata && | ||
| 422 | (unsigned long)p < (unsigned long)__end_rodata) | ||
| 423 | goto const_string; | ||
| 424 | if (log && within_module_core((unsigned long)p, log->owner)) | ||
| 425 | goto const_string; | ||
| 426 | q = kstrdup(p, GFP_KERNEL); | ||
| 427 | goto copied_string; | ||
| 428 | |||
| 429 | store_failure: | ||
| 430 | p = store_failure; | ||
| 431 | const_string: | ||
| 432 | q = (char *)p; | ||
| 433 | freeable = 0; | ||
| 434 | store_string: | ||
| 435 | if (!log) { | ||
| 436 | switch (fmt[0]) { | ||
| 437 | case 'w': | ||
| 438 | printk(KERN_WARNING "%s\n", q + 2); | ||
| 439 | break; | ||
| 440 | case 'e': | ||
| 441 | printk(KERN_ERR "%s\n", q + 2); | ||
| 442 | break; | ||
| 443 | default: | ||
| 444 | printk(KERN_NOTICE "%s\n", q + 2); | ||
| 445 | break; | ||
| 446 | } | ||
| 447 | if (freeable) | ||
| 448 | kfree(q); | ||
| 449 | } else { | ||
| 450 | unsigned int logsize = ARRAY_SIZE(log->buffer); | ||
| 451 | u8 index; | ||
| 452 | |||
| 453 | index = log->head & (logsize - 1); | ||
| 454 | BUILD_BUG_ON(sizeof(log->head) != sizeof(u8) || | ||
| 455 | sizeof(log->tail) != sizeof(u8)); | ||
| 456 | if ((u8)(log->head - log->tail) == logsize) { | ||
| 457 | /* The buffer is full, discard the oldest message */ | ||
| 458 | if (log->need_free & (1 << index)) | ||
| 459 | kfree(log->buffer[index]); | ||
| 460 | log->tail++; | ||
| 461 | } | ||
| 462 | |||
| 463 | log->buffer[index] = q; | ||
| 464 | log->need_free &= ~(1 << index); | ||
| 465 | log->need_free |= freeable << index; | ||
| 466 | log->head++; | ||
| 467 | } | ||
| 406 | va_end(va); | 468 | va_end(va); |
| 407 | } | 469 | } |
| 408 | EXPORT_SYMBOL(logfc); | 470 | EXPORT_SYMBOL(logfc); |
| 409 | #endif | 471 | |
| 472 | /* | ||
| 473 | * Free a logging structure. | ||
| 474 | */ | ||
| 475 | static void put_fc_log(struct fs_context *fc) | ||
| 476 | { | ||
| 477 | struct fc_log *log = fc->log; | ||
| 478 | int i; | ||
| 479 | |||
| 480 | if (log) { | ||
| 481 | if (refcount_dec_and_test(&log->usage)) { | ||
| 482 | fc->log = NULL; | ||
| 483 | for (i = 0; i <= 7; i++) | ||
| 484 | if (log->need_free & (1 << i)) | ||
| 485 | kfree(log->buffer[i]); | ||
| 486 | kfree(log); | ||
| 487 | } | ||
| 488 | } | ||
| 489 | } | ||
| 410 | 490 | ||
| 411 | /** | 491 | /** |
| 412 | * put_fs_context - Dispose of a superblock configuration context. | 492 | * put_fs_context - Dispose of a superblock configuration context. |
| @@ -431,6 +511,7 @@ void put_fs_context(struct fs_context *fc) | |||
| 431 | put_user_ns(fc->user_ns); | 511 | put_user_ns(fc->user_ns); |
| 432 | put_cred(fc->cred); | 512 | put_cred(fc->cred); |
| 433 | kfree(fc->subtype); | 513 | kfree(fc->subtype); |
| 514 | put_fc_log(fc); | ||
| 434 | put_filesystem(fc->fs_type); | 515 | put_filesystem(fc->fs_type); |
| 435 | kfree(fc->source); | 516 | kfree(fc->source); |
| 436 | kfree(fc); | 517 | kfree(fc); |
| @@ -640,3 +721,54 @@ int parse_monolithic_mount_data(struct fs_context *fc, void *data) | |||
| 640 | 721 | ||
| 641 | return monolithic_mount_data(fc, data); | 722 | return monolithic_mount_data(fc, data); |
| 642 | } | 723 | } |
| 724 | |||
| 725 | /* | ||
| 726 | * Clean up a context after performing an action on it and put it into a state | ||
| 727 | * from where it can be used to reconfigure a superblock. | ||
| 728 | * | ||
| 729 | * Note that here we do only the parts that can't fail; the rest is in | ||
| 730 | * finish_clean_context() below and in between those fs_context is marked | ||
| 731 | * FS_CONTEXT_AWAITING_RECONF. The reason for splitup is that after | ||
| 732 | * successful mount or remount we need to report success to userland. | ||
| 733 | * Trying to do full reinit (for the sake of possible subsequent remount) | ||
| 734 | * and failing to allocate memory would've put us into a nasty situation. | ||
| 735 | * So here we only discard the old state and reinitialization is left | ||
| 736 | * until we actually try to reconfigure. | ||
| 737 | */ | ||
| 738 | void vfs_clean_context(struct fs_context *fc) | ||
| 739 | { | ||
| 740 | if (fc->need_free && fc->ops && fc->ops->free) | ||
| 741 | fc->ops->free(fc); | ||
| 742 | fc->need_free = false; | ||
| 743 | fc->fs_private = NULL; | ||
| 744 | fc->s_fs_info = NULL; | ||
| 745 | fc->sb_flags = 0; | ||
| 746 | security_free_mnt_opts(&fc->security); | ||
| 747 | kfree(fc->subtype); | ||
| 748 | fc->subtype = NULL; | ||
| 749 | kfree(fc->source); | ||
| 750 | fc->source = NULL; | ||
| 751 | |||
| 752 | fc->purpose = FS_CONTEXT_FOR_RECONFIGURE; | ||
| 753 | fc->phase = FS_CONTEXT_AWAITING_RECONF; | ||
| 754 | } | ||
| 755 | |||
| 756 | int finish_clean_context(struct fs_context *fc) | ||
| 757 | { | ||
| 758 | int error; | ||
| 759 | |||
| 760 | if (fc->phase != FS_CONTEXT_AWAITING_RECONF) | ||
| 761 | return 0; | ||
| 762 | |||
| 763 | if (fc->fs_type->init_fs_context) | ||
| 764 | error = fc->fs_type->init_fs_context(fc); | ||
| 765 | else | ||
| 766 | error = legacy_init_fs_context(fc); | ||
| 767 | if (unlikely(error)) { | ||
| 768 | fc->phase = FS_CONTEXT_FAILED; | ||
| 769 | return error; | ||
| 770 | } | ||
| 771 | fc->need_free = true; | ||
| 772 | fc->phase = FS_CONTEXT_RECONF_PARAMS; | ||
| 773 | return 0; | ||
| 774 | } | ||
diff --git a/fs/fsopen.c b/fs/fsopen.c new file mode 100644 index 000000000000..3bb9c0c8cbcc --- /dev/null +++ b/fs/fsopen.c | |||
| @@ -0,0 +1,477 @@ | |||
| 1 | /* Filesystem access-by-fd. | ||
| 2 | * | ||
| 3 | * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. | ||
| 4 | * Written by David Howells (dhowells@redhat.com) | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public Licence | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the Licence, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/fs_context.h> | ||
| 13 | #include <linux/fs_parser.h> | ||
| 14 | #include <linux/slab.h> | ||
| 15 | #include <linux/uaccess.h> | ||
| 16 | #include <linux/syscalls.h> | ||
| 17 | #include <linux/security.h> | ||
| 18 | #include <linux/anon_inodes.h> | ||
| 19 | #include <linux/namei.h> | ||
| 20 | #include <linux/file.h> | ||
| 21 | #include <uapi/linux/mount.h> | ||
| 22 | #include "internal.h" | ||
| 23 | #include "mount.h" | ||
| 24 | |||
| 25 | /* | ||
| 26 | * Allow the user to read back any error, warning or informational messages. | ||
| 27 | */ | ||
| 28 | static ssize_t fscontext_read(struct file *file, | ||
| 29 | char __user *_buf, size_t len, loff_t *pos) | ||
| 30 | { | ||
| 31 | struct fs_context *fc = file->private_data; | ||
| 32 | struct fc_log *log = fc->log; | ||
| 33 | unsigned int logsize = ARRAY_SIZE(log->buffer); | ||
| 34 | ssize_t ret; | ||
| 35 | char *p; | ||
| 36 | bool need_free; | ||
| 37 | int index, n; | ||
| 38 | |||
| 39 | ret = mutex_lock_interruptible(&fc->uapi_mutex); | ||
| 40 | if (ret < 0) | ||
| 41 | return ret; | ||
| 42 | |||
| 43 | if (log->head == log->tail) { | ||
| 44 | mutex_unlock(&fc->uapi_mutex); | ||
| 45 | return -ENODATA; | ||
| 46 | } | ||
| 47 | |||
| 48 | index = log->tail & (logsize - 1); | ||
| 49 | p = log->buffer[index]; | ||
| 50 | need_free = log->need_free & (1 << index); | ||
| 51 | log->buffer[index] = NULL; | ||
| 52 | log->need_free &= ~(1 << index); | ||
| 53 | log->tail++; | ||
| 54 | mutex_unlock(&fc->uapi_mutex); | ||
| 55 | |||
| 56 | ret = -EMSGSIZE; | ||
| 57 | n = strlen(p); | ||
| 58 | if (n > len) | ||
| 59 | goto err_free; | ||
| 60 | ret = -EFAULT; | ||
| 61 | if (copy_to_user(_buf, p, n) != 0) | ||
| 62 | goto err_free; | ||
| 63 | ret = n; | ||
| 64 | |||
| 65 | err_free: | ||
| 66 | if (need_free) | ||
| 67 | kfree(p); | ||
| 68 | return ret; | ||
| 69 | } | ||
| 70 | |||
| 71 | static int fscontext_release(struct inode *inode, struct file *file) | ||
| 72 | { | ||
| 73 | struct fs_context *fc = file->private_data; | ||
| 74 | |||
| 75 | if (fc) { | ||
| 76 | file->private_data = NULL; | ||
| 77 | put_fs_context(fc); | ||
| 78 | } | ||
| 79 | return 0; | ||
| 80 | } | ||
| 81 | |||
| 82 | const struct file_operations fscontext_fops = { | ||
| 83 | .read = fscontext_read, | ||
| 84 | .release = fscontext_release, | ||
| 85 | .llseek = no_llseek, | ||
| 86 | }; | ||
| 87 | |||
| 88 | /* | ||
| 89 | * Attach a filesystem context to a file and an fd. | ||
| 90 | */ | ||
| 91 | static int fscontext_create_fd(struct fs_context *fc, unsigned int o_flags) | ||
| 92 | { | ||
| 93 | int fd; | ||
| 94 | |||
| 95 | fd = anon_inode_getfd("fscontext", &fscontext_fops, fc, | ||
| 96 | O_RDWR | o_flags); | ||
| 97 | if (fd < 0) | ||
| 98 | put_fs_context(fc); | ||
| 99 | return fd; | ||
| 100 | } | ||
| 101 | |||
| 102 | static int fscontext_alloc_log(struct fs_context *fc) | ||
| 103 | { | ||
| 104 | fc->log = kzalloc(sizeof(*fc->log), GFP_KERNEL); | ||
| 105 | if (!fc->log) | ||
| 106 | return -ENOMEM; | ||
| 107 | refcount_set(&fc->log->usage, 1); | ||
| 108 | fc->log->owner = fc->fs_type->owner; | ||
| 109 | return 0; | ||
| 110 | } | ||
| 111 | |||
| 112 | /* | ||
| 113 | * Open a filesystem by name so that it can be configured for mounting. | ||
| 114 | * | ||
| 115 | * We are allowed to specify a container in which the filesystem will be | ||
| 116 | * opened, thereby indicating which namespaces will be used (notably, which | ||
| 117 | * network namespace will be used for network filesystems). | ||
| 118 | */ | ||
| 119 | SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags) | ||
| 120 | { | ||
| 121 | struct file_system_type *fs_type; | ||
| 122 | struct fs_context *fc; | ||
| 123 | const char *fs_name; | ||
| 124 | int ret; | ||
| 125 | |||
| 126 | if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN)) | ||
| 127 | return -EPERM; | ||
| 128 | |||
| 129 | if (flags & ~FSOPEN_CLOEXEC) | ||
| 130 | return -EINVAL; | ||
| 131 | |||
| 132 | fs_name = strndup_user(_fs_name, PAGE_SIZE); | ||
| 133 | if (IS_ERR(fs_name)) | ||
| 134 | return PTR_ERR(fs_name); | ||
| 135 | |||
| 136 | fs_type = get_fs_type(fs_name); | ||
| 137 | kfree(fs_name); | ||
| 138 | if (!fs_type) | ||
| 139 | return -ENODEV; | ||
| 140 | |||
| 141 | fc = fs_context_for_mount(fs_type, 0); | ||
| 142 | put_filesystem(fs_type); | ||
| 143 | if (IS_ERR(fc)) | ||
| 144 | return PTR_ERR(fc); | ||
| 145 | |||
| 146 | fc->phase = FS_CONTEXT_CREATE_PARAMS; | ||
| 147 | |||
| 148 | ret = fscontext_alloc_log(fc); | ||
| 149 | if (ret < 0) | ||
| 150 | goto err_fc; | ||
| 151 | |||
| 152 | return fscontext_create_fd(fc, flags & FSOPEN_CLOEXEC ? O_CLOEXEC : 0); | ||
| 153 | |||
| 154 | err_fc: | ||
| 155 | put_fs_context(fc); | ||
| 156 | return ret; | ||
| 157 | } | ||
| 158 | |||
| 159 | /* | ||
| 160 | * Pick a superblock into a context for reconfiguration. | ||
| 161 | */ | ||
| 162 | SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags) | ||
| 163 | { | ||
| 164 | struct fs_context *fc; | ||
| 165 | struct path target; | ||
| 166 | unsigned int lookup_flags; | ||
| 167 | int ret; | ||
| 168 | |||
| 169 | if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN)) | ||
| 170 | return -EPERM; | ||
| 171 | |||
| 172 | if ((flags & ~(FSPICK_CLOEXEC | | ||
| 173 | FSPICK_SYMLINK_NOFOLLOW | | ||
| 174 | FSPICK_NO_AUTOMOUNT | | ||
| 175 | FSPICK_EMPTY_PATH)) != 0) | ||
| 176 | return -EINVAL; | ||
| 177 | |||
| 178 | lookup_flags = LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT; | ||
| 179 | if (flags & FSPICK_SYMLINK_NOFOLLOW) | ||
| 180 | lookup_flags &= ~LOOKUP_FOLLOW; | ||
| 181 | if (flags & FSPICK_NO_AUTOMOUNT) | ||
| 182 | lookup_flags &= ~LOOKUP_AUTOMOUNT; | ||
| 183 | if (flags & FSPICK_EMPTY_PATH) | ||
| 184 | lookup_flags |= LOOKUP_EMPTY; | ||
| 185 | ret = user_path_at(dfd, path, lookup_flags, &target); | ||
| 186 | if (ret < 0) | ||
| 187 | goto err; | ||
| 188 | |||
| 189 | ret = -EINVAL; | ||
| 190 | if (target.mnt->mnt_root != target.dentry) | ||
| 191 | goto err_path; | ||
| 192 | |||
| 193 | fc = fs_context_for_reconfigure(target.dentry, 0, 0); | ||
| 194 | if (IS_ERR(fc)) { | ||
| 195 | ret = PTR_ERR(fc); | ||
| 196 | goto err_path; | ||
| 197 | } | ||
| 198 | |||
| 199 | fc->phase = FS_CONTEXT_RECONF_PARAMS; | ||
| 200 | |||
| 201 | ret = fscontext_alloc_log(fc); | ||
| 202 | if (ret < 0) | ||
| 203 | goto err_fc; | ||
| 204 | |||
| 205 | path_put(&target); | ||
| 206 | return fscontext_create_fd(fc, flags & FSPICK_CLOEXEC ? O_CLOEXEC : 0); | ||
| 207 | |||
| 208 | err_fc: | ||
| 209 | put_fs_context(fc); | ||
| 210 | err_path: | ||
| 211 | path_put(&target); | ||
| 212 | err: | ||
| 213 | return ret; | ||
| 214 | } | ||
| 215 | |||
| 216 | /* | ||
| 217 | * Check the state and apply the configuration. Note that this function is | ||
| 218 | * allowed to 'steal' the value by setting param->xxx to NULL before returning. | ||
| 219 | */ | ||
| 220 | static int vfs_fsconfig_locked(struct fs_context *fc, int cmd, | ||
| 221 | struct fs_parameter *param) | ||
| 222 | { | ||
| 223 | struct super_block *sb; | ||
| 224 | int ret; | ||
| 225 | |||
| 226 | ret = finish_clean_context(fc); | ||
| 227 | if (ret) | ||
| 228 | return ret; | ||
| 229 | switch (cmd) { | ||
| 230 | case FSCONFIG_CMD_CREATE: | ||
| 231 | if (fc->phase != FS_CONTEXT_CREATE_PARAMS) | ||
| 232 | return -EBUSY; | ||
| 233 | fc->phase = FS_CONTEXT_CREATING; | ||
| 234 | ret = vfs_get_tree(fc); | ||
| 235 | if (ret) | ||
| 236 | break; | ||
| 237 | sb = fc->root->d_sb; | ||
| 238 | ret = security_sb_kern_mount(sb); | ||
| 239 | if (unlikely(ret)) { | ||
| 240 | fc_drop_locked(fc); | ||
| 241 | break; | ||
| 242 | } | ||
| 243 | up_write(&sb->s_umount); | ||
| 244 | fc->phase = FS_CONTEXT_AWAITING_MOUNT; | ||
| 245 | return 0; | ||
| 246 | case FSCONFIG_CMD_RECONFIGURE: | ||
| 247 | if (fc->phase != FS_CONTEXT_RECONF_PARAMS) | ||
| 248 | return -EBUSY; | ||
| 249 | fc->phase = FS_CONTEXT_RECONFIGURING; | ||
| 250 | sb = fc->root->d_sb; | ||
| 251 | if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { | ||
| 252 | ret = -EPERM; | ||
| 253 | break; | ||
| 254 | } | ||
| 255 | down_write(&sb->s_umount); | ||
| 256 | ret = reconfigure_super(fc); | ||
| 257 | up_write(&sb->s_umount); | ||
| 258 | if (ret) | ||
| 259 | break; | ||
| 260 | vfs_clean_context(fc); | ||
| 261 | return 0; | ||
| 262 | default: | ||
| 263 | if (fc->phase != FS_CONTEXT_CREATE_PARAMS && | ||
| 264 | fc->phase != FS_CONTEXT_RECONF_PARAMS) | ||
| 265 | return -EBUSY; | ||
| 266 | |||
| 267 | return vfs_parse_fs_param(fc, param); | ||
| 268 | } | ||
| 269 | fc->phase = FS_CONTEXT_FAILED; | ||
| 270 | return ret; | ||
| 271 | } | ||
| 272 | |||
| 273 | /** | ||
| 274 | * sys_fsconfig - Set parameters and trigger actions on a context | ||
| 275 | * @fd: The filesystem context to act upon | ||
| 276 | * @cmd: The action to take | ||
| 277 | * @_key: Where appropriate, the parameter key to set | ||
| 278 | * @_value: Where appropriate, the parameter value to set | ||
| 279 | * @aux: Additional information for the value | ||
| 280 | * | ||
| 281 | * This system call is used to set parameters on a context, including | ||
| 282 | * superblock settings, data source and security labelling. | ||
| 283 | * | ||
| 284 | * Actions include triggering the creation of a superblock and the | ||
| 285 | * reconfiguration of the superblock attached to the specified context. | ||
| 286 | * | ||
| 287 | * When setting a parameter, @cmd indicates the type of value being proposed | ||
| 288 | * and @_key indicates the parameter to be altered. | ||
| 289 | * | ||
| 290 | * @_value and @aux are used to specify the value, should a value be required: | ||
| 291 | * | ||
| 292 | * (*) fsconfig_set_flag: No value is specified. The parameter must be boolean | ||
| 293 | * in nature. The key may be prefixed with "no" to invert the | ||
| 294 | * setting. @_value must be NULL and @aux must be 0. | ||
| 295 | * | ||
| 296 | * (*) fsconfig_set_string: A string value is specified. The parameter can be | ||
| 297 | * expecting boolean, integer, string or take a path. A conversion to an | ||
| 298 | * appropriate type will be attempted (which may include looking up as a | ||
| 299 | * path). @_value points to a NUL-terminated string and @aux must be 0. | ||
| 300 | * | ||
| 301 | * (*) fsconfig_set_binary: A binary blob is specified. @_value points to the | ||
| 302 | * blob and @aux indicates its size. The parameter must be expecting a | ||
| 303 | * blob. | ||
| 304 | * | ||
| 305 | * (*) fsconfig_set_path: A non-empty path is specified. The parameter must be | ||
| 306 | * expecting a path object. @_value points to a NUL-terminated string that | ||
| 307 | * is the path and @aux is a file descriptor at which to start a relative | ||
| 308 | * lookup or AT_FDCWD. | ||
| 309 | * | ||
| 310 | * (*) fsconfig_set_path_empty: As fsconfig_set_path, but with AT_EMPTY_PATH | ||
| 311 | * implied. | ||
| 312 | * | ||
| 313 | * (*) fsconfig_set_fd: An open file descriptor is specified. @_value must be | ||
| 314 | * NULL and @aux indicates the file descriptor. | ||
| 315 | */ | ||
| 316 | SYSCALL_DEFINE5(fsconfig, | ||
| 317 | int, fd, | ||
| 318 | unsigned int, cmd, | ||
| 319 | const char __user *, _key, | ||
| 320 | const void __user *, _value, | ||
| 321 | int, aux) | ||
| 322 | { | ||
| 323 | struct fs_context *fc; | ||
| 324 | struct fd f; | ||
| 325 | int ret; | ||
| 326 | |||
| 327 | struct fs_parameter param = { | ||
| 328 | .type = fs_value_is_undefined, | ||
| 329 | }; | ||
| 330 | |||
| 331 | if (fd < 0) | ||
| 332 | return -EINVAL; | ||
| 333 | |||
| 334 | switch (cmd) { | ||
| 335 | case FSCONFIG_SET_FLAG: | ||
| 336 | if (!_key || _value || aux) | ||
| 337 | return -EINVAL; | ||
| 338 | break; | ||
| 339 | case FSCONFIG_SET_STRING: | ||
| 340 | if (!_key || !_value || aux) | ||
| 341 | return -EINVAL; | ||
| 342 | break; | ||
| 343 | case FSCONFIG_SET_BINARY: | ||
| 344 | if (!_key || !_value || aux <= 0 || aux > 1024 * 1024) | ||
| 345 | return -EINVAL; | ||
| 346 | break; | ||
| 347 | case FSCONFIG_SET_PATH: | ||
| 348 | case FSCONFIG_SET_PATH_EMPTY: | ||
| 349 | if (!_key || !_value || (aux != AT_FDCWD && aux < 0)) | ||
| 350 | return -EINVAL; | ||
| 351 | break; | ||
| 352 | case FSCONFIG_SET_FD: | ||
| 353 | if (!_key || _value || aux < 0) | ||
| 354 | return -EINVAL; | ||
| 355 | break; | ||
| 356 | case FSCONFIG_CMD_CREATE: | ||
| 357 | case FSCONFIG_CMD_RECONFIGURE: | ||
| 358 | if (_key || _value || aux) | ||
| 359 | return -EINVAL; | ||
| 360 | break; | ||
| 361 | default: | ||
| 362 | return -EOPNOTSUPP; | ||
| 363 | } | ||
| 364 | |||
| 365 | f = fdget(fd); | ||
| 366 | if (!f.file) | ||
| 367 | return -EBADF; | ||
| 368 | ret = -EINVAL; | ||
| 369 | if (f.file->f_op != &fscontext_fops) | ||
| 370 | goto out_f; | ||
| 371 | |||
| 372 | fc = f.file->private_data; | ||
| 373 | if (fc->ops == &legacy_fs_context_ops) { | ||
| 374 | switch (cmd) { | ||
| 375 | case FSCONFIG_SET_BINARY: | ||
| 376 | case FSCONFIG_SET_PATH: | ||
| 377 | case FSCONFIG_SET_PATH_EMPTY: | ||
| 378 | case FSCONFIG_SET_FD: | ||
| 379 | ret = -EOPNOTSUPP; | ||
| 380 | goto out_f; | ||
| 381 | } | ||
| 382 | } | ||
| 383 | |||
| 384 | if (_key) { | ||
| 385 | param.key = strndup_user(_key, 256); | ||
| 386 | if (IS_ERR(param.key)) { | ||
| 387 | ret = PTR_ERR(param.key); | ||
| 388 | goto out_f; | ||
| 389 | } | ||
| 390 | } | ||
| 391 | |||
| 392 | switch (cmd) { | ||
| 393 | case FSCONFIG_SET_FLAG: | ||
| 394 | param.type = fs_value_is_flag; | ||
| 395 | break; | ||
| 396 | case FSCONFIG_SET_STRING: | ||
| 397 | param.type = fs_value_is_string; | ||
| 398 | param.string = strndup_user(_value, 256); | ||
| 399 | if (IS_ERR(param.string)) { | ||
| 400 | ret = PTR_ERR(param.string); | ||
| 401 | goto out_key; | ||
| 402 | } | ||
| 403 | param.size = strlen(param.string); | ||
| 404 | break; | ||
| 405 | case FSCONFIG_SET_BINARY: | ||
| 406 | param.type = fs_value_is_blob; | ||
| 407 | param.size = aux; | ||
| 408 | param.blob = memdup_user_nul(_value, aux); | ||
| 409 | if (IS_ERR(param.blob)) { | ||
| 410 | ret = PTR_ERR(param.blob); | ||
| 411 | goto out_key; | ||
| 412 | } | ||
| 413 | break; | ||
| 414 | case FSCONFIG_SET_PATH: | ||
| 415 | param.type = fs_value_is_filename; | ||
| 416 | param.name = getname_flags(_value, 0, NULL); | ||
| 417 | if (IS_ERR(param.name)) { | ||
| 418 | ret = PTR_ERR(param.name); | ||
| 419 | goto out_key; | ||
| 420 | } | ||
| 421 | param.dirfd = aux; | ||
| 422 | param.size = strlen(param.name->name); | ||
| 423 | break; | ||
| 424 | case FSCONFIG_SET_PATH_EMPTY: | ||
| 425 | param.type = fs_value_is_filename_empty; | ||
| 426 | param.name = getname_flags(_value, LOOKUP_EMPTY, NULL); | ||
| 427 | if (IS_ERR(param.name)) { | ||
| 428 | ret = PTR_ERR(param.name); | ||
| 429 | goto out_key; | ||
| 430 | } | ||
| 431 | param.dirfd = aux; | ||
| 432 | param.size = strlen(param.name->name); | ||
| 433 | break; | ||
| 434 | case FSCONFIG_SET_FD: | ||
| 435 | param.type = fs_value_is_file; | ||
| 436 | ret = -EBADF; | ||
| 437 | param.file = fget(aux); | ||
| 438 | if (!param.file) | ||
| 439 | goto out_key; | ||
| 440 | break; | ||
| 441 | default: | ||
| 442 | break; | ||
| 443 | } | ||
| 444 | |||
| 445 | ret = mutex_lock_interruptible(&fc->uapi_mutex); | ||
| 446 | if (ret == 0) { | ||
| 447 | ret = vfs_fsconfig_locked(fc, cmd, ¶m); | ||
| 448 | mutex_unlock(&fc->uapi_mutex); | ||
| 449 | } | ||
| 450 | |||
| 451 | /* Clean up the our record of any value that we obtained from | ||
| 452 | * userspace. Note that the value may have been stolen by the LSM or | ||
| 453 | * filesystem, in which case the value pointer will have been cleared. | ||
| 454 | */ | ||
| 455 | switch (cmd) { | ||
| 456 | case FSCONFIG_SET_STRING: | ||
| 457 | case FSCONFIG_SET_BINARY: | ||
| 458 | kfree(param.string); | ||
| 459 | break; | ||
| 460 | case FSCONFIG_SET_PATH: | ||
| 461 | case FSCONFIG_SET_PATH_EMPTY: | ||
| 462 | if (param.name) | ||
| 463 | putname(param.name); | ||
| 464 | break; | ||
| 465 | case FSCONFIG_SET_FD: | ||
| 466 | if (param.file) | ||
| 467 | fput(param.file); | ||
| 468 | break; | ||
| 469 | default: | ||
| 470 | break; | ||
| 471 | } | ||
| 472 | out_key: | ||
| 473 | kfree(param.key); | ||
| 474 | out_f: | ||
| 475 | fdput(f); | ||
| 476 | return ret; | ||
| 477 | } | ||
diff --git a/fs/internal.h b/fs/internal.h index 17a8ae967493..0010889f2e85 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
| @@ -55,8 +55,11 @@ extern void __init chrdev_init(void); | |||
| 55 | /* | 55 | /* |
| 56 | * fs_context.c | 56 | * fs_context.c |
| 57 | */ | 57 | */ |
| 58 | extern const struct fs_context_operations legacy_fs_context_ops; | ||
| 58 | extern int parse_monolithic_mount_data(struct fs_context *, void *); | 59 | extern int parse_monolithic_mount_data(struct fs_context *, void *); |
| 59 | extern void fc_drop_locked(struct fs_context *); | 60 | extern void fc_drop_locked(struct fs_context *); |
| 61 | extern void vfs_clean_context(struct fs_context *fc); | ||
| 62 | extern int finish_clean_context(struct fs_context *fc); | ||
| 60 | 63 | ||
| 61 | /* | 64 | /* |
| 62 | * namei.c | 65 | * namei.c |
| @@ -92,6 +95,7 @@ extern void __init mnt_init(void); | |||
| 92 | extern int __mnt_want_write_file(struct file *); | 95 | extern int __mnt_want_write_file(struct file *); |
| 93 | extern void __mnt_drop_write_file(struct file *); | 96 | extern void __mnt_drop_write_file(struct file *); |
| 94 | 97 | ||
| 98 | extern void dissolve_on_fput(struct vfsmount *); | ||
| 95 | /* | 99 | /* |
| 96 | * fs_struct.c | 100 | * fs_struct.c |
| 97 | */ | 101 | */ |
diff --git a/fs/namespace.c b/fs/namespace.c index c9cab307fa77..3357c3d65475 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <linux/init.h> /* init_rootfs */ | 20 | #include <linux/init.h> /* init_rootfs */ |
| 21 | #include <linux/fs_struct.h> /* get_fs_root et.al. */ | 21 | #include <linux/fs_struct.h> /* get_fs_root et.al. */ |
| 22 | #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ | 22 | #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ |
| 23 | #include <linux/file.h> | ||
| 23 | #include <linux/uaccess.h> | 24 | #include <linux/uaccess.h> |
| 24 | #include <linux/proc_ns.h> | 25 | #include <linux/proc_ns.h> |
| 25 | #include <linux/magic.h> | 26 | #include <linux/magic.h> |
| @@ -1832,6 +1833,27 @@ struct vfsmount *collect_mounts(const struct path *path) | |||
| 1832 | return &tree->mnt; | 1833 | return &tree->mnt; |
| 1833 | } | 1834 | } |
| 1834 | 1835 | ||
| 1836 | static void free_mnt_ns(struct mnt_namespace *); | ||
| 1837 | static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool); | ||
| 1838 | |||
| 1839 | void dissolve_on_fput(struct vfsmount *mnt) | ||
| 1840 | { | ||
| 1841 | struct mnt_namespace *ns; | ||
| 1842 | namespace_lock(); | ||
| 1843 | lock_mount_hash(); | ||
| 1844 | ns = real_mount(mnt)->mnt_ns; | ||
| 1845 | if (ns) { | ||
| 1846 | if (is_anon_ns(ns)) | ||
| 1847 | umount_tree(real_mount(mnt), UMOUNT_CONNECTED); | ||
| 1848 | else | ||
| 1849 | ns = NULL; | ||
| 1850 | } | ||
| 1851 | unlock_mount_hash(); | ||
| 1852 | namespace_unlock(); | ||
| 1853 | if (ns) | ||
| 1854 | free_mnt_ns(ns); | ||
| 1855 | } | ||
| 1856 | |||
| 1835 | void drop_collected_mounts(struct vfsmount *mnt) | 1857 | void drop_collected_mounts(struct vfsmount *mnt) |
| 1836 | { | 1858 | { |
| 1837 | namespace_lock(); | 1859 | namespace_lock(); |
| @@ -2065,6 +2087,10 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
| 2065 | attach_mnt(source_mnt, dest_mnt, dest_mp); | 2087 | attach_mnt(source_mnt, dest_mnt, dest_mp); |
| 2066 | touch_mnt_namespace(source_mnt->mnt_ns); | 2088 | touch_mnt_namespace(source_mnt->mnt_ns); |
| 2067 | } else { | 2089 | } else { |
| 2090 | if (source_mnt->mnt_ns) { | ||
| 2091 | /* move from anon - the caller will destroy */ | ||
| 2092 | list_del_init(&source_mnt->mnt_ns->list); | ||
| 2093 | } | ||
| 2068 | mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); | 2094 | mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); |
| 2069 | commit_tree(source_mnt); | 2095 | commit_tree(source_mnt); |
| 2070 | } | 2096 | } |
| @@ -2222,6 +2248,30 @@ static bool has_locked_children(struct mount *mnt, struct dentry *dentry) | |||
| 2222 | return false; | 2248 | return false; |
| 2223 | } | 2249 | } |
| 2224 | 2250 | ||
| 2251 | static struct mount *__do_loopback(struct path *old_path, int recurse) | ||
| 2252 | { | ||
| 2253 | struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt); | ||
| 2254 | |||
| 2255 | if (IS_MNT_UNBINDABLE(old)) | ||
| 2256 | return mnt; | ||
| 2257 | |||
| 2258 | if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations) | ||
| 2259 | return mnt; | ||
| 2260 | |||
| 2261 | if (!recurse && has_locked_children(old, old_path->dentry)) | ||
| 2262 | return mnt; | ||
| 2263 | |||
| 2264 | if (recurse) | ||
| 2265 | mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE); | ||
| 2266 | else | ||
| 2267 | mnt = clone_mnt(old, old_path->dentry, 0); | ||
| 2268 | |||
| 2269 | if (!IS_ERR(mnt)) | ||
| 2270 | mnt->mnt.mnt_flags &= ~MNT_LOCKED; | ||
| 2271 | |||
| 2272 | return mnt; | ||
| 2273 | } | ||
| 2274 | |||
| 2225 | /* | 2275 | /* |
| 2226 | * do loopback mount. | 2276 | * do loopback mount. |
| 2227 | */ | 2277 | */ |
| @@ -2229,7 +2279,7 @@ static int do_loopback(struct path *path, const char *old_name, | |||
| 2229 | int recurse) | 2279 | int recurse) |
| 2230 | { | 2280 | { |
| 2231 | struct path old_path; | 2281 | struct path old_path; |
| 2232 | struct mount *mnt = NULL, *old, *parent; | 2282 | struct mount *mnt = NULL, *parent; |
| 2233 | struct mountpoint *mp; | 2283 | struct mountpoint *mp; |
| 2234 | int err; | 2284 | int err; |
| 2235 | if (!old_name || !*old_name) | 2285 | if (!old_name || !*old_name) |
| @@ -2243,38 +2293,21 @@ static int do_loopback(struct path *path, const char *old_name, | |||
| 2243 | goto out; | 2293 | goto out; |
| 2244 | 2294 | ||
| 2245 | mp = lock_mount(path); | 2295 | mp = lock_mount(path); |
| 2246 | err = PTR_ERR(mp); | 2296 | if (IS_ERR(mp)) { |
| 2247 | if (IS_ERR(mp)) | 2297 | err = PTR_ERR(mp); |
| 2248 | goto out; | 2298 | goto out; |
| 2299 | } | ||
| 2249 | 2300 | ||
| 2250 | old = real_mount(old_path.mnt); | ||
| 2251 | parent = real_mount(path->mnt); | 2301 | parent = real_mount(path->mnt); |
| 2252 | |||
| 2253 | err = -EINVAL; | ||
| 2254 | if (IS_MNT_UNBINDABLE(old)) | ||
| 2255 | goto out2; | ||
| 2256 | |||
| 2257 | if (!check_mnt(parent)) | 2302 | if (!check_mnt(parent)) |
| 2258 | goto out2; | 2303 | goto out2; |
| 2259 | 2304 | ||
| 2260 | if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations) | 2305 | mnt = __do_loopback(&old_path, recurse); |
| 2261 | goto out2; | ||
| 2262 | |||
| 2263 | if (!recurse && has_locked_children(old, old_path.dentry)) | ||
| 2264 | goto out2; | ||
| 2265 | |||
| 2266 | if (recurse) | ||
| 2267 | mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE); | ||
| 2268 | else | ||
| 2269 | mnt = clone_mnt(old, old_path.dentry, 0); | ||
| 2270 | |||
| 2271 | if (IS_ERR(mnt)) { | 2306 | if (IS_ERR(mnt)) { |
| 2272 | err = PTR_ERR(mnt); | 2307 | err = PTR_ERR(mnt); |
| 2273 | goto out2; | 2308 | goto out2; |
| 2274 | } | 2309 | } |
| 2275 | 2310 | ||
| 2276 | mnt->mnt.mnt_flags &= ~MNT_LOCKED; | ||
| 2277 | |||
| 2278 | err = graft_tree(mnt, parent, mp); | 2311 | err = graft_tree(mnt, parent, mp); |
| 2279 | if (err) { | 2312 | if (err) { |
| 2280 | lock_mount_hash(); | 2313 | lock_mount_hash(); |
| @@ -2288,6 +2321,96 @@ out: | |||
| 2288 | return err; | 2321 | return err; |
| 2289 | } | 2322 | } |
| 2290 | 2323 | ||
| 2324 | static struct file *open_detached_copy(struct path *path, bool recursive) | ||
| 2325 | { | ||
| 2326 | struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; | ||
| 2327 | struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true); | ||
| 2328 | struct mount *mnt, *p; | ||
| 2329 | struct file *file; | ||
| 2330 | |||
| 2331 | if (IS_ERR(ns)) | ||
| 2332 | return ERR_CAST(ns); | ||
| 2333 | |||
| 2334 | namespace_lock(); | ||
| 2335 | mnt = __do_loopback(path, recursive); | ||
| 2336 | if (IS_ERR(mnt)) { | ||
| 2337 | namespace_unlock(); | ||
| 2338 | free_mnt_ns(ns); | ||
| 2339 | return ERR_CAST(mnt); | ||
| 2340 | } | ||
| 2341 | |||
| 2342 | lock_mount_hash(); | ||
| 2343 | for (p = mnt; p; p = next_mnt(p, mnt)) { | ||
| 2344 | p->mnt_ns = ns; | ||
| 2345 | ns->mounts++; | ||
| 2346 | } | ||
| 2347 | ns->root = mnt; | ||
| 2348 | list_add_tail(&ns->list, &mnt->mnt_list); | ||
| 2349 | mntget(&mnt->mnt); | ||
| 2350 | unlock_mount_hash(); | ||
| 2351 | namespace_unlock(); | ||
| 2352 | |||
| 2353 | mntput(path->mnt); | ||
| 2354 | path->mnt = &mnt->mnt; | ||
| 2355 | file = dentry_open(path, O_PATH, current_cred()); | ||
| 2356 | if (IS_ERR(file)) | ||
| 2357 | dissolve_on_fput(path->mnt); | ||
| 2358 | else | ||
| 2359 | file->f_mode |= FMODE_NEED_UNMOUNT; | ||
| 2360 | return file; | ||
| 2361 | } | ||
| 2362 | |||
| 2363 | SYSCALL_DEFINE3(open_tree, int, dfd, const char *, filename, unsigned, flags) | ||
| 2364 | { | ||
| 2365 | struct file *file; | ||
| 2366 | struct path path; | ||
| 2367 | int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW; | ||
| 2368 | bool detached = flags & OPEN_TREE_CLONE; | ||
| 2369 | int error; | ||
| 2370 | int fd; | ||
| 2371 | |||
| 2372 | BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC); | ||
| 2373 | |||
| 2374 | if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE | | ||
| 2375 | AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE | | ||
| 2376 | OPEN_TREE_CLOEXEC)) | ||
| 2377 | return -EINVAL; | ||
| 2378 | |||
| 2379 | if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE) | ||
| 2380 | return -EINVAL; | ||
| 2381 | |||
| 2382 | if (flags & AT_NO_AUTOMOUNT) | ||
| 2383 | lookup_flags &= ~LOOKUP_AUTOMOUNT; | ||
| 2384 | if (flags & AT_SYMLINK_NOFOLLOW) | ||
| 2385 | lookup_flags &= ~LOOKUP_FOLLOW; | ||
| 2386 | if (flags & AT_EMPTY_PATH) | ||
| 2387 | lookup_flags |= LOOKUP_EMPTY; | ||
| 2388 | |||
| 2389 | if (detached && !may_mount()) | ||
| 2390 | return -EPERM; | ||
| 2391 | |||
| 2392 | fd = get_unused_fd_flags(flags & O_CLOEXEC); | ||
| 2393 | if (fd < 0) | ||
| 2394 | return fd; | ||
| 2395 | |||
| 2396 | error = user_path_at(dfd, filename, lookup_flags, &path); | ||
| 2397 | if (unlikely(error)) { | ||
| 2398 | file = ERR_PTR(error); | ||
| 2399 | } else { | ||
| 2400 | if (detached) | ||
| 2401 | file = open_detached_copy(&path, flags & AT_RECURSIVE); | ||
| 2402 | else | ||
| 2403 | file = dentry_open(&path, O_PATH, current_cred()); | ||
| 2404 | path_put(&path); | ||
| 2405 | } | ||
| 2406 | if (IS_ERR(file)) { | ||
| 2407 | put_unused_fd(fd); | ||
| 2408 | return PTR_ERR(file); | ||
| 2409 | } | ||
| 2410 | fd_install(fd, file); | ||
| 2411 | return fd; | ||
| 2412 | } | ||
| 2413 | |||
| 2291 | /* | 2414 | /* |
| 2292 | * Don't allow locked mount flags to be cleared. | 2415 | * Don't allow locked mount flags to be cleared. |
| 2293 | * | 2416 | * |
| @@ -2426,72 +2549,117 @@ static inline int tree_contains_unbindable(struct mount *mnt) | |||
| 2426 | return 0; | 2549 | return 0; |
| 2427 | } | 2550 | } |
| 2428 | 2551 | ||
| 2429 | static int do_move_mount(struct path *path, const char *old_name) | 2552 | /* |
| 2553 | * Check that there aren't references to earlier/same mount namespaces in the | ||
| 2554 | * specified subtree. Such references can act as pins for mount namespaces | ||
| 2555 | * that aren't checked by the mount-cycle checking code, thereby allowing | ||
| 2556 | * cycles to be made. | ||
| 2557 | */ | ||
| 2558 | static bool check_for_nsfs_mounts(struct mount *subtree) | ||
| 2430 | { | 2559 | { |
| 2431 | struct path old_path, parent_path; | 2560 | struct mount *p; |
| 2561 | bool ret = false; | ||
| 2562 | |||
| 2563 | lock_mount_hash(); | ||
| 2564 | for (p = subtree; p; p = next_mnt(p, subtree)) | ||
| 2565 | if (mnt_ns_loop(p->mnt.mnt_root)) | ||
| 2566 | goto out; | ||
| 2567 | |||
| 2568 | ret = true; | ||
| 2569 | out: | ||
| 2570 | unlock_mount_hash(); | ||
| 2571 | return ret; | ||
| 2572 | } | ||
| 2573 | |||
| 2574 | static int do_move_mount(struct path *old_path, struct path *new_path) | ||
| 2575 | { | ||
| 2576 | struct path parent_path = {.mnt = NULL, .dentry = NULL}; | ||
| 2577 | struct mnt_namespace *ns; | ||
| 2432 | struct mount *p; | 2578 | struct mount *p; |
| 2433 | struct mount *old; | 2579 | struct mount *old; |
| 2434 | struct mountpoint *mp; | 2580 | struct mountpoint *mp; |
| 2435 | int err; | 2581 | int err; |
| 2436 | if (!old_name || !*old_name) | 2582 | bool attached; |
| 2437 | return -EINVAL; | ||
| 2438 | err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); | ||
| 2439 | if (err) | ||
| 2440 | return err; | ||
| 2441 | 2583 | ||
| 2442 | mp = lock_mount(path); | 2584 | mp = lock_mount(new_path); |
| 2443 | err = PTR_ERR(mp); | ||
| 2444 | if (IS_ERR(mp)) | 2585 | if (IS_ERR(mp)) |
| 2445 | goto out; | 2586 | return PTR_ERR(mp); |
| 2446 | 2587 | ||
| 2447 | old = real_mount(old_path.mnt); | 2588 | old = real_mount(old_path->mnt); |
| 2448 | p = real_mount(path->mnt); | 2589 | p = real_mount(new_path->mnt); |
| 2590 | attached = mnt_has_parent(old); | ||
| 2591 | ns = old->mnt_ns; | ||
| 2449 | 2592 | ||
| 2450 | err = -EINVAL; | 2593 | err = -EINVAL; |
| 2451 | if (!check_mnt(p) || !check_mnt(old)) | 2594 | /* The mountpoint must be in our namespace. */ |
| 2452 | goto out1; | 2595 | if (!check_mnt(p)) |
| 2596 | goto out; | ||
| 2453 | 2597 | ||
| 2454 | if (old->mnt.mnt_flags & MNT_LOCKED) | 2598 | /* The thing moved should be either ours or completely unattached. */ |
| 2455 | goto out1; | 2599 | if (attached && !check_mnt(old)) |
| 2600 | goto out; | ||
| 2456 | 2601 | ||
| 2457 | err = -EINVAL; | 2602 | if (!attached && !is_anon_ns(ns)) |
| 2458 | if (old_path.dentry != old_path.mnt->mnt_root) | 2603 | goto out; |
| 2459 | goto out1; | ||
| 2460 | 2604 | ||
| 2461 | if (!mnt_has_parent(old)) | 2605 | if (old->mnt.mnt_flags & MNT_LOCKED) |
| 2462 | goto out1; | 2606 | goto out; |
| 2463 | 2607 | ||
| 2464 | if (d_is_dir(path->dentry) != | 2608 | if (old_path->dentry != old_path->mnt->mnt_root) |
| 2465 | d_is_dir(old_path.dentry)) | 2609 | goto out; |
| 2466 | goto out1; | 2610 | |
| 2611 | if (d_is_dir(new_path->dentry) != | ||
| 2612 | d_is_dir(old_path->dentry)) | ||
| 2613 | goto out; | ||
| 2467 | /* | 2614 | /* |
| 2468 | * Don't move a mount residing in a shared parent. | 2615 | * Don't move a mount residing in a shared parent. |
| 2469 | */ | 2616 | */ |
| 2470 | if (IS_MNT_SHARED(old->mnt_parent)) | 2617 | if (attached && IS_MNT_SHARED(old->mnt_parent)) |
| 2471 | goto out1; | 2618 | goto out; |
| 2472 | /* | 2619 | /* |
| 2473 | * Don't move a mount tree containing unbindable mounts to a destination | 2620 | * Don't move a mount tree containing unbindable mounts to a destination |
| 2474 | * mount which is shared. | 2621 | * mount which is shared. |
| 2475 | */ | 2622 | */ |
| 2476 | if (IS_MNT_SHARED(p) && tree_contains_unbindable(old)) | 2623 | if (IS_MNT_SHARED(p) && tree_contains_unbindable(old)) |
| 2477 | goto out1; | 2624 | goto out; |
| 2478 | err = -ELOOP; | 2625 | err = -ELOOP; |
| 2626 | if (!check_for_nsfs_mounts(old)) | ||
| 2627 | goto out; | ||
| 2479 | for (; mnt_has_parent(p); p = p->mnt_parent) | 2628 | for (; mnt_has_parent(p); p = p->mnt_parent) |
| 2480 | if (p == old) | 2629 | if (p == old) |
| 2481 | goto out1; | 2630 | goto out; |
| 2482 | 2631 | ||
| 2483 | err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path); | 2632 | err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp, |
| 2633 | attached ? &parent_path : NULL); | ||
| 2484 | if (err) | 2634 | if (err) |
| 2485 | goto out1; | 2635 | goto out; |
| 2486 | 2636 | ||
| 2487 | /* if the mount is moved, it should no longer be expire | 2637 | /* if the mount is moved, it should no longer be expire |
| 2488 | * automatically */ | 2638 | * automatically */ |
| 2489 | list_del_init(&old->mnt_expire); | 2639 | list_del_init(&old->mnt_expire); |
| 2490 | out1: | ||
| 2491 | unlock_mount(mp); | ||
| 2492 | out: | 2640 | out: |
| 2493 | if (!err) | 2641 | unlock_mount(mp); |
| 2642 | if (!err) { | ||
| 2494 | path_put(&parent_path); | 2643 | path_put(&parent_path); |
| 2644 | if (!attached) | ||
| 2645 | free_mnt_ns(ns); | ||
| 2646 | } | ||
| 2647 | return err; | ||
| 2648 | } | ||
| 2649 | |||
| 2650 | static int do_move_mount_old(struct path *path, const char *old_name) | ||
| 2651 | { | ||
| 2652 | struct path old_path; | ||
| 2653 | int err; | ||
| 2654 | |||
| 2655 | if (!old_name || !*old_name) | ||
| 2656 | return -EINVAL; | ||
| 2657 | |||
| 2658 | err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); | ||
| 2659 | if (err) | ||
| 2660 | return err; | ||
| 2661 | |||
| 2662 | err = do_move_mount(&old_path, path); | ||
| 2495 | path_put(&old_path); | 2663 | path_put(&old_path); |
| 2496 | return err; | 2664 | return err; |
| 2497 | } | 2665 | } |
| @@ -2937,7 +3105,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, | |||
| 2937 | else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) | 3105 | else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) |
| 2938 | retval = do_change_type(&path, flags); | 3106 | retval = do_change_type(&path, flags); |
| 2939 | else if (flags & MS_MOVE) | 3107 | else if (flags & MS_MOVE) |
| 2940 | retval = do_move_mount(&path, dev_name); | 3108 | retval = do_move_mount_old(&path, dev_name); |
| 2941 | else | 3109 | else |
| 2942 | retval = do_new_mount(&path, type_page, sb_flags, mnt_flags, | 3110 | retval = do_new_mount(&path, type_page, sb_flags, mnt_flags, |
| 2943 | dev_name, data_page); | 3111 | dev_name, data_page); |
| @@ -3166,6 +3334,203 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, | |||
| 3166 | } | 3334 | } |
| 3167 | 3335 | ||
| 3168 | /* | 3336 | /* |
| 3337 | * Create a kernel mount representation for a new, prepared superblock | ||
| 3338 | * (specified by fs_fd) and attach to an open_tree-like file descriptor. | ||
| 3339 | */ | ||
| 3340 | SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, | ||
| 3341 | unsigned int, attr_flags) | ||
| 3342 | { | ||
| 3343 | struct mnt_namespace *ns; | ||
| 3344 | struct fs_context *fc; | ||
| 3345 | struct file *file; | ||
| 3346 | struct path newmount; | ||
| 3347 | struct mount *mnt; | ||
| 3348 | struct fd f; | ||
| 3349 | unsigned int mnt_flags = 0; | ||
| 3350 | long ret; | ||
| 3351 | |||
| 3352 | if (!may_mount()) | ||
| 3353 | return -EPERM; | ||
| 3354 | |||
| 3355 | if ((flags & ~(FSMOUNT_CLOEXEC)) != 0) | ||
| 3356 | return -EINVAL; | ||
| 3357 | |||
| 3358 | if (attr_flags & ~(MOUNT_ATTR_RDONLY | | ||
| 3359 | MOUNT_ATTR_NOSUID | | ||
| 3360 | MOUNT_ATTR_NODEV | | ||
| 3361 | MOUNT_ATTR_NOEXEC | | ||
| 3362 | MOUNT_ATTR__ATIME | | ||
| 3363 | MOUNT_ATTR_NODIRATIME)) | ||
| 3364 | return -EINVAL; | ||
| 3365 | |||
| 3366 | if (attr_flags & MOUNT_ATTR_RDONLY) | ||
| 3367 | mnt_flags |= MNT_READONLY; | ||
| 3368 | if (attr_flags & MOUNT_ATTR_NOSUID) | ||
| 3369 | mnt_flags |= MNT_NOSUID; | ||
| 3370 | if (attr_flags & MOUNT_ATTR_NODEV) | ||
| 3371 | mnt_flags |= MNT_NODEV; | ||
| 3372 | if (attr_flags & MOUNT_ATTR_NOEXEC) | ||
| 3373 | mnt_flags |= MNT_NOEXEC; | ||
| 3374 | if (attr_flags & MOUNT_ATTR_NODIRATIME) | ||
| 3375 | mnt_flags |= MNT_NODIRATIME; | ||
| 3376 | |||
| 3377 | switch (attr_flags & MOUNT_ATTR__ATIME) { | ||
| 3378 | case MOUNT_ATTR_STRICTATIME: | ||
| 3379 | break; | ||
| 3380 | case MOUNT_ATTR_NOATIME: | ||
| 3381 | mnt_flags |= MNT_NOATIME; | ||
| 3382 | break; | ||
| 3383 | case MOUNT_ATTR_RELATIME: | ||
| 3384 | mnt_flags |= MNT_RELATIME; | ||
| 3385 | break; | ||
| 3386 | default: | ||
| 3387 | return -EINVAL; | ||
| 3388 | } | ||
| 3389 | |||
| 3390 | f = fdget(fs_fd); | ||
| 3391 | if (!f.file) | ||
| 3392 | return -EBADF; | ||
| 3393 | |||
| 3394 | ret = -EINVAL; | ||
| 3395 | if (f.file->f_op != &fscontext_fops) | ||
| 3396 | goto err_fsfd; | ||
| 3397 | |||
| 3398 | fc = f.file->private_data; | ||
| 3399 | |||
| 3400 | ret = mutex_lock_interruptible(&fc->uapi_mutex); | ||
| 3401 | if (ret < 0) | ||
| 3402 | goto err_fsfd; | ||
| 3403 | |||
| 3404 | /* There must be a valid superblock or we can't mount it */ | ||
| 3405 | ret = -EINVAL; | ||
| 3406 | if (!fc->root) | ||
| 3407 | goto err_unlock; | ||
| 3408 | |||
| 3409 | ret = -EPERM; | ||
| 3410 | if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) { | ||
| 3411 | pr_warn("VFS: Mount too revealing\n"); | ||
| 3412 | goto err_unlock; | ||
| 3413 | } | ||
| 3414 | |||
| 3415 | ret = -EBUSY; | ||
| 3416 | if (fc->phase != FS_CONTEXT_AWAITING_MOUNT) | ||
| 3417 | goto err_unlock; | ||
| 3418 | |||
| 3419 | ret = -EPERM; | ||
| 3420 | if ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock()) | ||
| 3421 | goto err_unlock; | ||
| 3422 | |||
| 3423 | newmount.mnt = vfs_create_mount(fc); | ||
| 3424 | if (IS_ERR(newmount.mnt)) { | ||
| 3425 | ret = PTR_ERR(newmount.mnt); | ||
| 3426 | goto err_unlock; | ||
| 3427 | } | ||
| 3428 | newmount.dentry = dget(fc->root); | ||
| 3429 | newmount.mnt->mnt_flags = mnt_flags; | ||
| 3430 | |||
| 3431 | /* We've done the mount bit - now move the file context into more or | ||
| 3432 | * less the same state as if we'd done an fspick(). We don't want to | ||
| 3433 | * do any memory allocation or anything like that at this point as we | ||
| 3434 | * don't want to have to handle any errors incurred. | ||
| 3435 | */ | ||
| 3436 | vfs_clean_context(fc); | ||
| 3437 | |||
| 3438 | ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true); | ||
| 3439 | if (IS_ERR(ns)) { | ||
| 3440 | ret = PTR_ERR(ns); | ||
| 3441 | goto err_path; | ||
| 3442 | } | ||
| 3443 | mnt = real_mount(newmount.mnt); | ||
| 3444 | mnt->mnt_ns = ns; | ||
| 3445 | ns->root = mnt; | ||
| 3446 | ns->mounts = 1; | ||
| 3447 | list_add(&mnt->mnt_list, &ns->list); | ||
| 3448 | |||
| 3449 | /* Attach to an apparent O_PATH fd with a note that we need to unmount | ||
| 3450 | * it, not just simply put it. | ||
| 3451 | */ | ||
| 3452 | file = dentry_open(&newmount, O_PATH, fc->cred); | ||
| 3453 | if (IS_ERR(file)) { | ||
| 3454 | dissolve_on_fput(newmount.mnt); | ||
| 3455 | ret = PTR_ERR(file); | ||
| 3456 | goto err_path; | ||
| 3457 | } | ||
| 3458 | file->f_mode |= FMODE_NEED_UNMOUNT; | ||
| 3459 | |||
| 3460 | ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0); | ||
| 3461 | if (ret >= 0) | ||
| 3462 | fd_install(ret, file); | ||
| 3463 | else | ||
| 3464 | fput(file); | ||
| 3465 | |||
| 3466 | err_path: | ||
| 3467 | path_put(&newmount); | ||
| 3468 | err_unlock: | ||
| 3469 | mutex_unlock(&fc->uapi_mutex); | ||
| 3470 | err_fsfd: | ||
| 3471 | fdput(f); | ||
| 3472 | return ret; | ||
| 3473 | } | ||
| 3474 | |||
| 3475 | /* | ||
| 3476 | * Move a mount from one place to another. In combination with | ||
| 3477 | * fsopen()/fsmount() this is used to install a new mount and in combination | ||
| 3478 | * with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be used to copy | ||
| 3479 | * a mount subtree. | ||
| 3480 | * | ||
| 3481 | * Note the flags value is a combination of MOVE_MOUNT_* flags. | ||
| 3482 | */ | ||
| 3483 | SYSCALL_DEFINE5(move_mount, | ||
| 3484 | int, from_dfd, const char *, from_pathname, | ||
| 3485 | int, to_dfd, const char *, to_pathname, | ||
| 3486 | unsigned int, flags) | ||
| 3487 | { | ||
| 3488 | struct path from_path, to_path; | ||
| 3489 | unsigned int lflags; | ||
| 3490 | int ret = 0; | ||
| 3491 | |||
| 3492 | if (!may_mount()) | ||
| 3493 | return -EPERM; | ||
| 3494 | |||
| 3495 | if (flags & ~MOVE_MOUNT__MASK) | ||
| 3496 | return -EINVAL; | ||
| 3497 | |||
| 3498 | /* If someone gives a pathname, they aren't permitted to move | ||
| 3499 | * from an fd that requires unmount as we can't get at the flag | ||
| 3500 | * to clear it afterwards. | ||
| 3501 | */ | ||
| 3502 | lflags = 0; | ||
| 3503 | if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW; | ||
| 3504 | if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT; | ||
| 3505 | if (flags & MOVE_MOUNT_F_EMPTY_PATH) lflags |= LOOKUP_EMPTY; | ||
| 3506 | |||
| 3507 | ret = user_path_at(from_dfd, from_pathname, lflags, &from_path); | ||
| 3508 | if (ret < 0) | ||
| 3509 | return ret; | ||
| 3510 | |||
| 3511 | lflags = 0; | ||
| 3512 | if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW; | ||
| 3513 | if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT; | ||
| 3514 | if (flags & MOVE_MOUNT_T_EMPTY_PATH) lflags |= LOOKUP_EMPTY; | ||
| 3515 | |||
| 3516 | ret = user_path_at(to_dfd, to_pathname, lflags, &to_path); | ||
| 3517 | if (ret < 0) | ||
| 3518 | goto out_from; | ||
| 3519 | |||
| 3520 | ret = security_move_mount(&from_path, &to_path); | ||
| 3521 | if (ret < 0) | ||
| 3522 | goto out_to; | ||
| 3523 | |||
| 3524 | ret = do_move_mount(&from_path, &to_path); | ||
| 3525 | |||
| 3526 | out_to: | ||
| 3527 | path_put(&to_path); | ||
| 3528 | out_from: | ||
| 3529 | path_put(&from_path); | ||
| 3530 | return ret; | ||
| 3531 | } | ||
| 3532 | |||
| 3533 | /* | ||
| 3169 | * Return true if path is reachable from root | 3534 | * Return true if path is reachable from root |
| 3170 | * | 3535 | * |
| 3171 | * namespace_sem or mount_lock is held | 3536 | * namespace_sem or mount_lock is held |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 5174405e40d5..ec07f4c5630d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -165,10 +165,13 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, | |||
| 165 | #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) | 165 | #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) |
| 166 | 166 | ||
| 167 | /* File is capable of returning -EAGAIN if I/O will block */ | 167 | /* File is capable of returning -EAGAIN if I/O will block */ |
| 168 | #define FMODE_NOWAIT ((__force fmode_t)0x8000000) | 168 | #define FMODE_NOWAIT ((__force fmode_t)0x8000000) |
| 169 | |||
| 170 | /* File represents mount that needs unmounting */ | ||
| 171 | #define FMODE_NEED_UNMOUNT ((__force fmode_t)0x10000000) | ||
| 169 | 172 | ||
| 170 | /* File does not contribute to nr_files count */ | 173 | /* File does not contribute to nr_files count */ |
| 171 | #define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) | 174 | #define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) |
| 172 | 175 | ||
| 173 | /* | 176 | /* |
| 174 | * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector | 177 | * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector |
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index eaca452088fa..1f966670c8dc 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h | |||
| @@ -13,8 +13,10 @@ | |||
| 13 | #define _LINUX_FS_CONTEXT_H | 13 | #define _LINUX_FS_CONTEXT_H |
| 14 | 14 | ||
| 15 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
| 16 | #include <linux/refcount.h> | ||
| 16 | #include <linux/errno.h> | 17 | #include <linux/errno.h> |
| 17 | #include <linux/security.h> | 18 | #include <linux/security.h> |
| 19 | #include <linux/mutex.h> | ||
| 18 | 20 | ||
| 19 | struct cred; | 21 | struct cred; |
| 20 | struct dentry; | 22 | struct dentry; |
| @@ -35,6 +37,19 @@ enum fs_context_purpose { | |||
| 35 | }; | 37 | }; |
| 36 | 38 | ||
| 37 | /* | 39 | /* |
| 40 | * Userspace usage phase for fsopen/fspick. | ||
| 41 | */ | ||
| 42 | enum fs_context_phase { | ||
| 43 | FS_CONTEXT_CREATE_PARAMS, /* Loading params for sb creation */ | ||
| 44 | FS_CONTEXT_CREATING, /* A superblock is being created */ | ||
| 45 | FS_CONTEXT_AWAITING_MOUNT, /* Superblock created, awaiting fsmount() */ | ||
| 46 | FS_CONTEXT_AWAITING_RECONF, /* Awaiting initialisation for reconfiguration */ | ||
| 47 | FS_CONTEXT_RECONF_PARAMS, /* Loading params for reconfiguration */ | ||
| 48 | FS_CONTEXT_RECONFIGURING, /* Reconfiguring the superblock */ | ||
| 49 | FS_CONTEXT_FAILED, /* Failed to correctly transition a context */ | ||
| 50 | }; | ||
| 51 | |||
| 52 | /* | ||
| 38 | * Type of parameter value. | 53 | * Type of parameter value. |
| 39 | */ | 54 | */ |
| 40 | enum fs_value_type { | 55 | enum fs_value_type { |
| @@ -74,12 +89,14 @@ struct fs_parameter { | |||
| 74 | */ | 89 | */ |
| 75 | struct fs_context { | 90 | struct fs_context { |
| 76 | const struct fs_context_operations *ops; | 91 | const struct fs_context_operations *ops; |
| 92 | struct mutex uapi_mutex; /* Userspace access mutex */ | ||
| 77 | struct file_system_type *fs_type; | 93 | struct file_system_type *fs_type; |
| 78 | void *fs_private; /* The filesystem's context */ | 94 | void *fs_private; /* The filesystem's context */ |
| 79 | struct dentry *root; /* The root and superblock */ | 95 | struct dentry *root; /* The root and superblock */ |
| 80 | struct user_namespace *user_ns; /* The user namespace for this mount */ | 96 | struct user_namespace *user_ns; /* The user namespace for this mount */ |
| 81 | struct net *net_ns; /* The network namespace for this mount */ | 97 | struct net *net_ns; /* The network namespace for this mount */ |
| 82 | const struct cred *cred; /* The mounter's credentials */ | 98 | const struct cred *cred; /* The mounter's credentials */ |
| 99 | struct fc_log *log; /* Logging buffer */ | ||
| 83 | const char *source; /* The source name (eg. dev path) */ | 100 | const char *source; /* The source name (eg. dev path) */ |
| 84 | const char *subtype; /* The subtype to set on the superblock */ | 101 | const char *subtype; /* The subtype to set on the superblock */ |
| 85 | void *security; /* Linux S&M options */ | 102 | void *security; /* Linux S&M options */ |
| @@ -88,6 +105,7 @@ struct fs_context { | |||
| 88 | unsigned int sb_flags_mask; /* Superblock flags that were changed */ | 105 | unsigned int sb_flags_mask; /* Superblock flags that were changed */ |
| 89 | unsigned int lsm_flags; /* Information flags from the fs to the LSM */ | 106 | unsigned int lsm_flags; /* Information flags from the fs to the LSM */ |
| 90 | enum fs_context_purpose purpose:8; | 107 | enum fs_context_purpose purpose:8; |
| 108 | enum fs_context_phase phase:8; /* The phase the context is in */ | ||
| 91 | bool need_free:1; /* Need to call ops->free() */ | 109 | bool need_free:1; /* Need to call ops->free() */ |
| 92 | bool global:1; /* Goes into &init_user_ns */ | 110 | bool global:1; /* Goes into &init_user_ns */ |
| 93 | }; | 111 | }; |
| @@ -135,15 +153,21 @@ extern int vfs_get_super(struct fs_context *fc, | |||
| 135 | 153 | ||
| 136 | extern const struct file_operations fscontext_fops; | 154 | extern const struct file_operations fscontext_fops; |
| 137 | 155 | ||
| 138 | #ifdef CONFIG_PRINTK | 156 | /* |
| 157 | * Mount error, warning and informational message logging. This structure is | ||
| 158 | * shareable between a mount and a subordinate mount. | ||
| 159 | */ | ||
| 160 | struct fc_log { | ||
| 161 | refcount_t usage; | ||
| 162 | u8 head; /* Insertion index in buffer[] */ | ||
| 163 | u8 tail; /* Removal index in buffer[] */ | ||
| 164 | u8 need_free; /* Mask of kfree'able items in buffer[] */ | ||
| 165 | struct module *owner; /* Owner module for strings that don't then need freeing */ | ||
| 166 | char *buffer[8]; | ||
| 167 | }; | ||
| 168 | |||
| 139 | extern __attribute__((format(printf, 2, 3))) | 169 | extern __attribute__((format(printf, 2, 3))) |
| 140 | void logfc(struct fs_context *fc, const char *fmt, ...); | 170 | void logfc(struct fs_context *fc, const char *fmt, ...); |
| 141 | #else | ||
| 142 | static inline __attribute__((format(printf, 2, 3))) | ||
| 143 | void logfc(struct fs_context *fc, const char *fmt, ...) | ||
| 144 | { | ||
| 145 | } | ||
| 146 | #endif | ||
| 147 | 171 | ||
| 148 | /** | 172 | /** |
| 149 | * infof - Store supplementary informational message | 173 | * infof - Store supplementary informational message |
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index f7e55d0d2672..47f58cfb6a19 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h | |||
| @@ -159,6 +159,10 @@ | |||
| 159 | * Parse a string of security data filling in the opts structure | 159 | * Parse a string of security data filling in the opts structure |
| 160 | * @options string containing all mount options known by the LSM | 160 | * @options string containing all mount options known by the LSM |
| 161 | * @opts binary data structure usable by the LSM | 161 | * @opts binary data structure usable by the LSM |
| 162 | * @move_mount: | ||
| 163 | * Check permission before a mount is moved. | ||
| 164 | * @from_path indicates the mount that is going to be moved. | ||
| 165 | * @to_path indicates the mountpoint that will be mounted upon. | ||
| 162 | * @dentry_init_security: | 166 | * @dentry_init_security: |
| 163 | * Compute a context for a dentry as the inode is not yet available | 167 | * Compute a context for a dentry as the inode is not yet available |
| 164 | * since NFSv4 has no label backed by an EA anyway. | 168 | * since NFSv4 has no label backed by an EA anyway. |
| @@ -1502,6 +1506,7 @@ union security_list_options { | |||
| 1502 | unsigned long *set_kern_flags); | 1506 | unsigned long *set_kern_flags); |
| 1503 | int (*sb_add_mnt_opt)(const char *option, const char *val, int len, | 1507 | int (*sb_add_mnt_opt)(const char *option, const char *val, int len, |
| 1504 | void **mnt_opts); | 1508 | void **mnt_opts); |
| 1509 | int (*move_mount)(const struct path *from_path, const struct path *to_path); | ||
| 1505 | int (*dentry_init_security)(struct dentry *dentry, int mode, | 1510 | int (*dentry_init_security)(struct dentry *dentry, int mode, |
| 1506 | const struct qstr *name, void **ctx, | 1511 | const struct qstr *name, void **ctx, |
| 1507 | u32 *ctxlen); | 1512 | u32 *ctxlen); |
| @@ -1839,6 +1844,7 @@ struct security_hook_heads { | |||
| 1839 | struct hlist_head sb_set_mnt_opts; | 1844 | struct hlist_head sb_set_mnt_opts; |
| 1840 | struct hlist_head sb_clone_mnt_opts; | 1845 | struct hlist_head sb_clone_mnt_opts; |
| 1841 | struct hlist_head sb_add_mnt_opt; | 1846 | struct hlist_head sb_add_mnt_opt; |
| 1847 | struct hlist_head move_mount; | ||
| 1842 | struct hlist_head dentry_init_security; | 1848 | struct hlist_head dentry_init_security; |
| 1843 | struct hlist_head dentry_create_files_as; | 1849 | struct hlist_head dentry_create_files_as; |
| 1844 | #ifdef CONFIG_SECURITY_PATH | 1850 | #ifdef CONFIG_SECURITY_PATH |
diff --git a/include/linux/module.h b/include/linux/module.h index 5bf5dcd91009..7dc4dc79b634 100644 --- a/include/linux/module.h +++ b/include/linux/module.h | |||
| @@ -709,6 +709,12 @@ static inline bool is_module_text_address(unsigned long addr) | |||
| 709 | return false; | 709 | return false; |
| 710 | } | 710 | } |
| 711 | 711 | ||
| 712 | static inline bool within_module_core(unsigned long addr, | ||
| 713 | const struct module *mod) | ||
| 714 | { | ||
| 715 | return false; | ||
| 716 | } | ||
| 717 | |||
| 712 | /* Get/put a kernel symbol (calls should be symmetric) */ | 718 | /* Get/put a kernel symbol (calls should be symmetric) */ |
| 713 | #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); }) | 719 | #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); }) |
| 714 | #define symbol_put(x) do { } while (0) | 720 | #define symbol_put(x) do { } while (0) |
diff --git a/include/linux/security.h b/include/linux/security.h index d543293216b9..659071c2e57c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h | |||
| @@ -251,6 +251,7 @@ int security_sb_clone_mnt_opts(const struct super_block *oldsb, | |||
| 251 | unsigned long *set_kern_flags); | 251 | unsigned long *set_kern_flags); |
| 252 | int security_add_mnt_opt(const char *option, const char *val, | 252 | int security_add_mnt_opt(const char *option, const char *val, |
| 253 | int len, void **mnt_opts); | 253 | int len, void **mnt_opts); |
| 254 | int security_move_mount(const struct path *from_path, const struct path *to_path); | ||
| 254 | int security_dentry_init_security(struct dentry *dentry, int mode, | 255 | int security_dentry_init_security(struct dentry *dentry, int mode, |
| 255 | const struct qstr *name, void **ctx, | 256 | const struct qstr *name, void **ctx, |
| 256 | u32 *ctxlen); | 257 | u32 *ctxlen); |
| @@ -614,6 +615,12 @@ static inline int security_add_mnt_opt(const char *option, const char *val, | |||
| 614 | return 0; | 615 | return 0; |
| 615 | } | 616 | } |
| 616 | 617 | ||
| 618 | static inline int security_move_mount(const struct path *from_path, | ||
| 619 | const struct path *to_path) | ||
| 620 | { | ||
| 621 | return 0; | ||
| 622 | } | ||
| 623 | |||
| 617 | static inline int security_inode_alloc(struct inode *inode) | 624 | static inline int security_inode_alloc(struct inode *inode) |
| 618 | { | 625 | { |
| 619 | return 0; | 626 | return 0; |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e446806a561f..e2870fe1be5b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
| @@ -985,6 +985,15 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, | |||
| 985 | unsigned mask, struct statx __user *buffer); | 985 | unsigned mask, struct statx __user *buffer); |
| 986 | asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, | 986 | asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, |
| 987 | int flags, uint32_t sig); | 987 | int flags, uint32_t sig); |
| 988 | asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags); | ||
| 989 | asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path, | ||
| 990 | int to_dfd, const char __user *to_path, | ||
| 991 | unsigned int ms_flags); | ||
| 992 | asmlinkage long sys_fsopen(const char __user *fs_name, unsigned int flags); | ||
| 993 | asmlinkage long sys_fsconfig(int fs_fd, unsigned int cmd, const char __user *key, | ||
| 994 | const void __user *value, int aux); | ||
| 995 | asmlinkage long sys_fsmount(int fs_fd, unsigned int flags, unsigned int ms_flags); | ||
| 996 | asmlinkage long sys_fspick(int dfd, const char __user *path, unsigned int flags); | ||
| 988 | asmlinkage long sys_pidfd_send_signal(int pidfd, int sig, | 997 | asmlinkage long sys_pidfd_send_signal(int pidfd, int sig, |
| 989 | siginfo_t __user *info, | 998 | siginfo_t __user *info, |
| 990 | unsigned int flags); | 999 | unsigned int flags); |
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index a2f8658f1c55..1d338357df8a 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h | |||
| @@ -91,5 +91,7 @@ | |||
| 91 | #define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ | 91 | #define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ |
| 92 | #define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ | 92 | #define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ |
| 93 | 93 | ||
| 94 | #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ | ||
| 95 | |||
| 94 | 96 | ||
| 95 | #endif /* _UAPI_LINUX_FCNTL_H */ | 97 | #endif /* _UAPI_LINUX_FCNTL_H */ |
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 3f9ec42510b0..96a0240f23fe 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h | |||
| @@ -55,4 +55,66 @@ | |||
| 55 | #define MS_MGC_VAL 0xC0ED0000 | 55 | #define MS_MGC_VAL 0xC0ED0000 |
| 56 | #define MS_MGC_MSK 0xffff0000 | 56 | #define MS_MGC_MSK 0xffff0000 |
| 57 | 57 | ||
| 58 | /* | ||
| 59 | * open_tree() flags. | ||
| 60 | */ | ||
| 61 | #define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ | ||
| 62 | #define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ | ||
| 63 | |||
| 64 | /* | ||
| 65 | * move_mount() flags. | ||
| 66 | */ | ||
| 67 | #define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */ | ||
| 68 | #define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */ | ||
| 69 | #define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ | ||
| 70 | #define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ | ||
| 71 | #define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ | ||
| 72 | #define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ | ||
| 73 | #define MOVE_MOUNT__MASK 0x00000077 | ||
| 74 | |||
| 75 | /* | ||
| 76 | * fsopen() flags. | ||
| 77 | */ | ||
| 78 | #define FSOPEN_CLOEXEC 0x00000001 | ||
| 79 | |||
| 80 | /* | ||
| 81 | * fspick() flags. | ||
| 82 | */ | ||
| 83 | #define FSPICK_CLOEXEC 0x00000001 | ||
| 84 | #define FSPICK_SYMLINK_NOFOLLOW 0x00000002 | ||
| 85 | #define FSPICK_NO_AUTOMOUNT 0x00000004 | ||
| 86 | #define FSPICK_EMPTY_PATH 0x00000008 | ||
| 87 | |||
| 88 | /* | ||
| 89 | * The type of fsconfig() call made. | ||
| 90 | */ | ||
| 91 | enum fsconfig_command { | ||
| 92 | FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ | ||
| 93 | FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ | ||
| 94 | FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ | ||
| 95 | FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ | ||
| 96 | FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ | ||
| 97 | FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ | ||
| 98 | FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ | ||
| 99 | FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ | ||
| 100 | }; | ||
| 101 | |||
| 102 | /* | ||
| 103 | * fsmount() flags. | ||
| 104 | */ | ||
| 105 | #define FSMOUNT_CLOEXEC 0x00000001 | ||
| 106 | |||
| 107 | /* | ||
| 108 | * Mount attributes. | ||
| 109 | */ | ||
| 110 | #define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ | ||
| 111 | #define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */ | ||
| 112 | #define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */ | ||
| 113 | #define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */ | ||
| 114 | #define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */ | ||
| 115 | #define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ | ||
| 116 | #define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ | ||
| 117 | #define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ | ||
| 118 | #define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ | ||
| 119 | |||
| 58 | #endif /* _UAPI_LINUX_MOUNT_H */ | 120 | #endif /* _UAPI_LINUX_MOUNT_H */ |
diff --git a/samples/Kconfig b/samples/Kconfig index d19754ccad08..30a89425009c 100644 --- a/samples/Kconfig +++ b/samples/Kconfig | |||
| @@ -154,10 +154,11 @@ config SAMPLE_ANDROID_BINDERFS | |||
| 154 | Builds a sample program to illustrate the use of the Android binderfs | 154 | Builds a sample program to illustrate the use of the Android binderfs |
| 155 | filesystem. | 155 | filesystem. |
| 156 | 156 | ||
| 157 | config SAMPLE_STATX | 157 | config SAMPLE_VFS |
| 158 | bool "Build example extended-stat using code" | 158 | bool "Build example programs that use new VFS system calls" |
| 159 | depends on BROKEN | ||
| 160 | help | 159 | help |
| 161 | Build example userspace program to use the new extended-stat syscall. | 160 | Build example userspace programs that use new VFS system calls such |
| 161 | as mount API and statx(). Note that this is restricted to the x86 | ||
| 162 | arch whilst it accesses system calls that aren't yet in all arches. | ||
| 162 | 163 | ||
| 163 | endif # SAMPLES | 164 | endif # SAMPLES |
diff --git a/samples/Makefile b/samples/Makefile index fadadb1c3b05..2484cc262d3e 100644 --- a/samples/Makefile +++ b/samples/Makefile | |||
| @@ -3,4 +3,4 @@ | |||
| 3 | obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \ | 3 | obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \ |
| 4 | hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \ | 4 | hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \ |
| 5 | configfs/ connector/ v4l/ trace_printk/ \ | 5 | configfs/ connector/ v4l/ trace_printk/ \ |
| 6 | vfio-mdev/ statx/ qmi/ binderfs/ pidfd/ | 6 | vfio-mdev/ vfs/ qmi/ binderfs/ pidfd/ |
diff --git a/samples/statx/Makefile b/samples/vfs/Makefile index 59df7c25a9d1..4ac9690fb3c4 100644 --- a/samples/statx/Makefile +++ b/samples/vfs/Makefile | |||
| @@ -1,7 +1,10 @@ | |||
| 1 | # List of programs to build | 1 | # List of programs to build |
| 2 | hostprogs-$(CONFIG_SAMPLE_STATX) := test-statx | 2 | hostprogs-$(CONFIG_SAMPLE_VFS) := \ |
| 3 | test-fsmount \ | ||
| 4 | test-statx | ||
| 3 | 5 | ||
| 4 | # Tell kbuild to always build the programs | 6 | # Tell kbuild to always build the programs |
| 5 | always := $(hostprogs-y) | 7 | always := $(hostprogs-y) |
| 6 | 8 | ||
| 9 | HOSTCFLAGS_test-fsmount.o += -I$(objtree)/usr/include | ||
| 7 | HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include | 10 | HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include |
diff --git a/samples/vfs/test-fsmount.c b/samples/vfs/test-fsmount.c new file mode 100644 index 000000000000..266d72b3dce4 --- /dev/null +++ b/samples/vfs/test-fsmount.c | |||
| @@ -0,0 +1,133 @@ | |||
| 1 | /* fd-based mount test. | ||
| 2 | * | ||
| 3 | * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. | ||
| 4 | * Written by David Howells (dhowells@redhat.com) | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public Licence | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the Licence, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <stdio.h> | ||
| 13 | #include <stdlib.h> | ||
| 14 | #include <unistd.h> | ||
| 15 | #include <errno.h> | ||
| 16 | #include <fcntl.h> | ||
| 17 | #include <sys/prctl.h> | ||
| 18 | #include <sys/wait.h> | ||
| 19 | #include <linux/mount.h> | ||
| 20 | #include <linux/unistd.h> | ||
| 21 | |||
| 22 | #define E(x) do { if ((x) == -1) { perror(#x); exit(1); } } while(0) | ||
| 23 | |||
| 24 | static void check_messages(int fd) | ||
| 25 | { | ||
| 26 | char buf[4096]; | ||
| 27 | int err, n; | ||
| 28 | |||
| 29 | err = errno; | ||
| 30 | |||
| 31 | for (;;) { | ||
| 32 | n = read(fd, buf, sizeof(buf)); | ||
| 33 | if (n < 0) | ||
| 34 | break; | ||
| 35 | n -= 2; | ||
| 36 | |||
| 37 | switch (buf[0]) { | ||
| 38 | case 'e': | ||
| 39 | fprintf(stderr, "Error: %*.*s\n", n, n, buf + 2); | ||
| 40 | break; | ||
| 41 | case 'w': | ||
| 42 | fprintf(stderr, "Warning: %*.*s\n", n, n, buf + 2); | ||
| 43 | break; | ||
| 44 | case 'i': | ||
| 45 | fprintf(stderr, "Info: %*.*s\n", n, n, buf + 2); | ||
| 46 | break; | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | errno = err; | ||
| 51 | } | ||
| 52 | |||
| 53 | static __attribute__((noreturn)) | ||
| 54 | void mount_error(int fd, const char *s) | ||
| 55 | { | ||
| 56 | check_messages(fd); | ||
| 57 | fprintf(stderr, "%s: %m\n", s); | ||
| 58 | exit(1); | ||
| 59 | } | ||
| 60 | |||
| 61 | /* Hope -1 isn't a syscall */ | ||
| 62 | #ifndef __NR_fsopen | ||
| 63 | #define __NR_fsopen -1 | ||
| 64 | #endif | ||
| 65 | #ifndef __NR_fsmount | ||
| 66 | #define __NR_fsmount -1 | ||
| 67 | #endif | ||
| 68 | #ifndef __NR_fsconfig | ||
| 69 | #define __NR_fsconfig -1 | ||
| 70 | #endif | ||
| 71 | #ifndef __NR_move_mount | ||
| 72 | #define __NR_move_mount -1 | ||
| 73 | #endif | ||
| 74 | |||
| 75 | |||
| 76 | static inline int fsopen(const char *fs_name, unsigned int flags) | ||
| 77 | { | ||
| 78 | return syscall(__NR_fsopen, fs_name, flags); | ||
| 79 | } | ||
| 80 | |||
| 81 | static inline int fsmount(int fsfd, unsigned int flags, unsigned int ms_flags) | ||
| 82 | { | ||
| 83 | return syscall(__NR_fsmount, fsfd, flags, ms_flags); | ||
| 84 | } | ||
| 85 | |||
| 86 | static inline int fsconfig(int fsfd, unsigned int cmd, | ||
| 87 | const char *key, const void *val, int aux) | ||
| 88 | { | ||
| 89 | return syscall(__NR_fsconfig, fsfd, cmd, key, val, aux); | ||
| 90 | } | ||
| 91 | |||
| 92 | static inline int move_mount(int from_dfd, const char *from_pathname, | ||
| 93 | int to_dfd, const char *to_pathname, | ||
| 94 | unsigned int flags) | ||
| 95 | { | ||
| 96 | return syscall(__NR_move_mount, | ||
| 97 | from_dfd, from_pathname, | ||
| 98 | to_dfd, to_pathname, flags); | ||
| 99 | } | ||
| 100 | |||
| 101 | #define E_fsconfig(fd, cmd, key, val, aux) \ | ||
| 102 | do { \ | ||
| 103 | if (fsconfig(fd, cmd, key, val, aux) == -1) \ | ||
| 104 | mount_error(fd, key ?: "create"); \ | ||
| 105 | } while (0) | ||
| 106 | |||
| 107 | int main(int argc, char *argv[]) | ||
| 108 | { | ||
| 109 | int fsfd, mfd; | ||
| 110 | |||
| 111 | /* Mount a publically available AFS filesystem */ | ||
| 112 | fsfd = fsopen("afs", 0); | ||
| 113 | if (fsfd == -1) { | ||
| 114 | perror("fsopen"); | ||
| 115 | exit(1); | ||
| 116 | } | ||
| 117 | |||
| 118 | E_fsconfig(fsfd, FSCONFIG_SET_STRING, "source", "#grand.central.org:root.cell.", 0); | ||
| 119 | E_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); | ||
| 120 | |||
| 121 | mfd = fsmount(fsfd, 0, MOUNT_ATTR_RDONLY); | ||
| 122 | if (mfd < 0) | ||
| 123 | mount_error(fsfd, "fsmount"); | ||
| 124 | E(close(fsfd)); | ||
| 125 | |||
| 126 | if (move_mount(mfd, "", AT_FDCWD, "/mnt", MOVE_MOUNT_F_EMPTY_PATH) < 0) { | ||
| 127 | perror("move_mount"); | ||
| 128 | exit(1); | ||
| 129 | } | ||
| 130 | |||
| 131 | E(close(mfd)); | ||
| 132 | exit(0); | ||
| 133 | } | ||
diff --git a/samples/statx/test-statx.c b/samples/vfs/test-statx.c index d4d77b09412c..e91f918e84c4 100644 --- a/samples/statx/test-statx.c +++ b/samples/vfs/test-statx.c | |||
| @@ -25,13 +25,21 @@ | |||
| 25 | #include <sys/types.h> | 25 | #include <sys/types.h> |
| 26 | #include <linux/stat.h> | 26 | #include <linux/stat.h> |
| 27 | #include <linux/fcntl.h> | 27 | #include <linux/fcntl.h> |
| 28 | #define statx foo | ||
| 29 | #define statx_timestamp foo_timestamp | ||
| 28 | #include <sys/stat.h> | 30 | #include <sys/stat.h> |
| 31 | #undef statx | ||
| 32 | #undef statx_timestamp | ||
| 29 | 33 | ||
| 30 | #define AT_STATX_SYNC_TYPE 0x6000 | 34 | #define AT_STATX_SYNC_TYPE 0x6000 |
| 31 | #define AT_STATX_SYNC_AS_STAT 0x0000 | 35 | #define AT_STATX_SYNC_AS_STAT 0x0000 |
| 32 | #define AT_STATX_FORCE_SYNC 0x2000 | 36 | #define AT_STATX_FORCE_SYNC 0x2000 |
| 33 | #define AT_STATX_DONT_SYNC 0x4000 | 37 | #define AT_STATX_DONT_SYNC 0x4000 |
| 34 | 38 | ||
| 39 | #ifndef __NR_statx | ||
| 40 | #define __NR_statx -1 | ||
| 41 | #endif | ||
| 42 | |||
| 35 | static __attribute__((unused)) | 43 | static __attribute__((unused)) |
| 36 | ssize_t statx(int dfd, const char *filename, unsigned flags, | 44 | ssize_t statx(int dfd, const char *filename, unsigned flags, |
| 37 | unsigned int mask, struct statx *buffer) | 45 | unsigned int mask, struct statx *buffer) |
| @@ -157,7 +165,8 @@ static void dump_statx(struct statx *stx) | |||
| 157 | "?dai?c??" /* 7- 0 0x00000000-000000ff */ | 165 | "?dai?c??" /* 7- 0 0x00000000-000000ff */ |
| 158 | ; | 166 | ; |
| 159 | 167 | ||
| 160 | printf("Attributes: %016llx (", stx->stx_attributes); | 168 | printf("Attributes: %016llx (", |
| 169 | (unsigned long long)stx->stx_attributes); | ||
| 161 | for (byte = 64 - 8; byte >= 0; byte -= 8) { | 170 | for (byte = 64 - 8; byte >= 0; byte -= 8) { |
| 162 | bits = stx->stx_attributes >> byte; | 171 | bits = stx->stx_attributes >> byte; |
| 163 | mbits = stx->stx_attributes_mask >> byte; | 172 | mbits = stx->stx_attributes_mask >> byte; |
diff --git a/security/security.c b/security/security.c index 8d6ef9da94eb..613a5c00e602 100644 --- a/security/security.c +++ b/security/security.c | |||
| @@ -866,6 +866,11 @@ int security_add_mnt_opt(const char *option, const char *val, int len, | |||
| 866 | } | 866 | } |
| 867 | EXPORT_SYMBOL(security_add_mnt_opt); | 867 | EXPORT_SYMBOL(security_add_mnt_opt); |
| 868 | 868 | ||
| 869 | int security_move_mount(const struct path *from_path, const struct path *to_path) | ||
| 870 | { | ||
| 871 | return call_int_hook(move_mount, 0, from_path, to_path); | ||
| 872 | } | ||
| 873 | |||
| 869 | int security_inode_alloc(struct inode *inode) | 874 | int security_inode_alloc(struct inode *inode) |
| 870 | { | 875 | { |
| 871 | int rc = lsm_inode_alloc(inode); | 876 | int rc = lsm_inode_alloc(inode); |
