diff options
author | David Drysdale <drysdale@google.com> | 2014-12-12 19:57:29 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-13 15:42:51 -0500 |
commit | 51f39a1f0cea1cacf8c787f652f26dfee9611874 (patch) | |
tree | 4b9199e785bdd9e8c0c55a0ec94ce8d268885bc5 /fs | |
parent | c0ef0cc9d277f0f2a83b5a287a816b3916d9f026 (diff) |
syscalls: implement execveat() system call
This patchset adds execveat(2) for x86, and is derived from Meredydd
Luff's patch from Sept 2012 (https://lkml.org/lkml/2012/9/11/528).
The primary aim of adding an execveat syscall is to allow an
implementation of fexecve(3) that does not rely on the /proc filesystem,
at least for executables (rather than scripts). The current glibc version
of fexecve(3) is implemented via /proc, which causes problems in sandboxed
or otherwise restricted environments.
Given the desire for a /proc-free fexecve() implementation, HPA suggested
(https://lkml.org/lkml/2006/7/11/556) that an execveat(2) syscall would be
an appropriate generalization.
Also, having a new syscall means that it can take a flags argument without
back-compatibility concerns. The current implementation just defines the
AT_EMPTY_PATH and AT_SYMLINK_NOFOLLOW flags, but other flags could be
added in future -- for example, flags for new namespaces (as suggested at
https://lkml.org/lkml/2006/7/11/474).
Related history:
- https://lkml.org/lkml/2006/12/27/123 is an example of someone
realizing that fexecve() is likely to fail in a chroot environment.
- http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=514043 covered
documenting the /proc requirement of fexecve(3) in its manpage, to
"prevent other people from wasting their time".
- https://bugzilla.redhat.com/show_bug.cgi?id=241609 described a
problem where a process that did setuid() could not fexecve()
because it no longer had access to /proc/self/fd; this has since
been fixed.
This patch (of 4):
Add a new execveat(2) system call. execveat() is to execve() as openat()
is to open(): it takes a file descriptor that refers to a directory, and
resolves the filename relative to that.
In addition, if the filename is empty and AT_EMPTY_PATH is specified,
execveat() executes the file to which the file descriptor refers. This
replicates the functionality of fexecve(), which is a system call in other
UNIXen, but in Linux glibc it depends on opening "/proc/self/fd/<fd>" (and
so relies on /proc being mounted).
The filename fed to the executed program as argv[0] (or the name of the
script fed to a script interpreter) will be of the form "/dev/fd/<fd>"
(for an empty filename) or "/dev/fd/<fd>/<filename>", effectively
reflecting how the executable was found. This does however mean that
execution of a script in a /proc-less environment won't work; also, script
execution via an O_CLOEXEC file descriptor fails (as the file will not be
accessible after exec).
Based on patches by Meredydd Luff.
Signed-off-by: David Drysdale <drysdale@google.com>
Cc: Meredydd Luff <meredydd@senatehouse.org>
Cc: Shuah Khan <shuah.kh@samsung.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Rich Felker <dalias@aerifal.cx>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/binfmt_em86.c | 4 | ||||
-rw-r--r-- | fs/binfmt_misc.c | 4 | ||||
-rw-r--r-- | fs/binfmt_script.c | 10 | ||||
-rw-r--r-- | fs/exec.c | 113 | ||||
-rw-r--r-- | fs/namei.c | 2 |
5 files changed, 119 insertions, 14 deletions
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index f37b08cea1f7..490538536cb4 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c | |||
@@ -42,6 +42,10 @@ static int load_em86(struct linux_binprm *bprm) | |||
42 | return -ENOEXEC; | 42 | return -ENOEXEC; |
43 | } | 43 | } |
44 | 44 | ||
45 | /* Need to be able to load the file after exec */ | ||
46 | if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) | ||
47 | return -ENOENT; | ||
48 | |||
45 | allow_write_access(bprm->file); | 49 | allow_write_access(bprm->file); |
46 | fput(bprm->file); | 50 | fput(bprm->file); |
47 | bprm->file = NULL; | 51 | bprm->file = NULL; |
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 70789e198dea..c04ef1d4f18a 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c | |||
@@ -144,6 +144,10 @@ static int load_misc_binary(struct linux_binprm *bprm) | |||
144 | if (!fmt) | 144 | if (!fmt) |
145 | goto ret; | 145 | goto ret; |
146 | 146 | ||
147 | /* Need to be able to load the file after exec */ | ||
148 | if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) | ||
149 | return -ENOENT; | ||
150 | |||
147 | if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) { | 151 | if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) { |
148 | retval = remove_arg_zero(bprm); | 152 | retval = remove_arg_zero(bprm); |
149 | if (retval) | 153 | if (retval) |
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 5027a3e14922..afdf4e3cafc2 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c | |||
@@ -24,6 +24,16 @@ static int load_script(struct linux_binprm *bprm) | |||
24 | 24 | ||
25 | if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) | 25 | if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) |
26 | return -ENOEXEC; | 26 | return -ENOEXEC; |
27 | |||
28 | /* | ||
29 | * If the script filename will be inaccessible after exec, typically | ||
30 | * because it is a "/dev/fd/<fd>/.." path against an O_CLOEXEC fd, give | ||
31 | * up now (on the assumption that the interpreter will want to load | ||
32 | * this file). | ||
33 | */ | ||
34 | if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) | ||
35 | return -ENOENT; | ||
36 | |||
27 | /* | 37 | /* |
28 | * This section does the #! interpretation. | 38 | * This section does the #! interpretation. |
29 | * Sorta complicated, but hopefully it will work. -TYT | 39 | * Sorta complicated, but hopefully it will work. -TYT |
@@ -748,18 +748,25 @@ EXPORT_SYMBOL(setup_arg_pages); | |||
748 | 748 | ||
749 | #endif /* CONFIG_MMU */ | 749 | #endif /* CONFIG_MMU */ |
750 | 750 | ||
751 | static struct file *do_open_exec(struct filename *name) | 751 | static struct file *do_open_execat(int fd, struct filename *name, int flags) |
752 | { | 752 | { |
753 | struct file *file; | 753 | struct file *file; |
754 | int err; | 754 | int err; |
755 | static const struct open_flags open_exec_flags = { | 755 | struct open_flags open_exec_flags = { |
756 | .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, | 756 | .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, |
757 | .acc_mode = MAY_EXEC | MAY_OPEN, | 757 | .acc_mode = MAY_EXEC | MAY_OPEN, |
758 | .intent = LOOKUP_OPEN, | 758 | .intent = LOOKUP_OPEN, |
759 | .lookup_flags = LOOKUP_FOLLOW, | 759 | .lookup_flags = LOOKUP_FOLLOW, |
760 | }; | 760 | }; |
761 | 761 | ||
762 | file = do_filp_open(AT_FDCWD, name, &open_exec_flags); | 762 | if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0) |
763 | return ERR_PTR(-EINVAL); | ||
764 | if (flags & AT_SYMLINK_NOFOLLOW) | ||
765 | open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW; | ||
766 | if (flags & AT_EMPTY_PATH) | ||
767 | open_exec_flags.lookup_flags |= LOOKUP_EMPTY; | ||
768 | |||
769 | file = do_filp_open(fd, name, &open_exec_flags); | ||
763 | if (IS_ERR(file)) | 770 | if (IS_ERR(file)) |
764 | goto out; | 771 | goto out; |
765 | 772 | ||
@@ -770,12 +777,13 @@ static struct file *do_open_exec(struct filename *name) | |||
770 | if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) | 777 | if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) |
771 | goto exit; | 778 | goto exit; |
772 | 779 | ||
773 | fsnotify_open(file); | ||
774 | |||
775 | err = deny_write_access(file); | 780 | err = deny_write_access(file); |
776 | if (err) | 781 | if (err) |
777 | goto exit; | 782 | goto exit; |
778 | 783 | ||
784 | if (name->name[0] != '\0') | ||
785 | fsnotify_open(file); | ||
786 | |||
779 | out: | 787 | out: |
780 | return file; | 788 | return file; |
781 | 789 | ||
@@ -787,7 +795,7 @@ exit: | |||
787 | struct file *open_exec(const char *name) | 795 | struct file *open_exec(const char *name) |
788 | { | 796 | { |
789 | struct filename tmp = { .name = name }; | 797 | struct filename tmp = { .name = name }; |
790 | return do_open_exec(&tmp); | 798 | return do_open_execat(AT_FDCWD, &tmp, 0); |
791 | } | 799 | } |
792 | EXPORT_SYMBOL(open_exec); | 800 | EXPORT_SYMBOL(open_exec); |
793 | 801 | ||
@@ -1428,10 +1436,12 @@ static int exec_binprm(struct linux_binprm *bprm) | |||
1428 | /* | 1436 | /* |
1429 | * sys_execve() executes a new program. | 1437 | * sys_execve() executes a new program. |
1430 | */ | 1438 | */ |
1431 | static int do_execve_common(struct filename *filename, | 1439 | static int do_execveat_common(int fd, struct filename *filename, |
1432 | struct user_arg_ptr argv, | 1440 | struct user_arg_ptr argv, |
1433 | struct user_arg_ptr envp) | 1441 | struct user_arg_ptr envp, |
1442 | int flags) | ||
1434 | { | 1443 | { |
1444 | char *pathbuf = NULL; | ||
1435 | struct linux_binprm *bprm; | 1445 | struct linux_binprm *bprm; |
1436 | struct file *file; | 1446 | struct file *file; |
1437 | struct files_struct *displaced; | 1447 | struct files_struct *displaced; |
@@ -1472,7 +1482,7 @@ static int do_execve_common(struct filename *filename, | |||
1472 | check_unsafe_exec(bprm); | 1482 | check_unsafe_exec(bprm); |
1473 | current->in_execve = 1; | 1483 | current->in_execve = 1; |
1474 | 1484 | ||
1475 | file = do_open_exec(filename); | 1485 | file = do_open_execat(fd, filename, flags); |
1476 | retval = PTR_ERR(file); | 1486 | retval = PTR_ERR(file); |
1477 | if (IS_ERR(file)) | 1487 | if (IS_ERR(file)) |
1478 | goto out_unmark; | 1488 | goto out_unmark; |
@@ -1480,7 +1490,28 @@ static int do_execve_common(struct filename *filename, | |||
1480 | sched_exec(); | 1490 | sched_exec(); |
1481 | 1491 | ||
1482 | bprm->file = file; | 1492 | bprm->file = file; |
1483 | bprm->filename = bprm->interp = filename->name; | 1493 | if (fd == AT_FDCWD || filename->name[0] == '/') { |
1494 | bprm->filename = filename->name; | ||
1495 | } else { | ||
1496 | if (filename->name[0] == '\0') | ||
1497 | pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d", fd); | ||
1498 | else | ||
1499 | pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d/%s", | ||
1500 | fd, filename->name); | ||
1501 | if (!pathbuf) { | ||
1502 | retval = -ENOMEM; | ||
1503 | goto out_unmark; | ||
1504 | } | ||
1505 | /* | ||
1506 | * Record that a name derived from an O_CLOEXEC fd will be | ||
1507 | * inaccessible after exec. Relies on having exclusive access to | ||
1508 | * current->files (due to unshare_files above). | ||
1509 | */ | ||
1510 | if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt))) | ||
1511 | bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; | ||
1512 | bprm->filename = pathbuf; | ||
1513 | } | ||
1514 | bprm->interp = bprm->filename; | ||
1484 | 1515 | ||
1485 | retval = bprm_mm_init(bprm); | 1516 | retval = bprm_mm_init(bprm); |
1486 | if (retval) | 1517 | if (retval) |
@@ -1521,6 +1552,7 @@ static int do_execve_common(struct filename *filename, | |||
1521 | acct_update_integrals(current); | 1552 | acct_update_integrals(current); |
1522 | task_numa_free(current); | 1553 | task_numa_free(current); |
1523 | free_bprm(bprm); | 1554 | free_bprm(bprm); |
1555 | kfree(pathbuf); | ||
1524 | putname(filename); | 1556 | putname(filename); |
1525 | if (displaced) | 1557 | if (displaced) |
1526 | put_files_struct(displaced); | 1558 | put_files_struct(displaced); |
@@ -1538,6 +1570,7 @@ out_unmark: | |||
1538 | 1570 | ||
1539 | out_free: | 1571 | out_free: |
1540 | free_bprm(bprm); | 1572 | free_bprm(bprm); |
1573 | kfree(pathbuf); | ||
1541 | 1574 | ||
1542 | out_files: | 1575 | out_files: |
1543 | if (displaced) | 1576 | if (displaced) |
@@ -1553,7 +1586,18 @@ int do_execve(struct filename *filename, | |||
1553 | { | 1586 | { |
1554 | struct user_arg_ptr argv = { .ptr.native = __argv }; | 1587 | struct user_arg_ptr argv = { .ptr.native = __argv }; |
1555 | struct user_arg_ptr envp = { .ptr.native = __envp }; | 1588 | struct user_arg_ptr envp = { .ptr.native = __envp }; |
1556 | return do_execve_common(filename, argv, envp); | 1589 | return do_execveat_common(AT_FDCWD, filename, argv, envp, 0); |
1590 | } | ||
1591 | |||
1592 | int do_execveat(int fd, struct filename *filename, | ||
1593 | const char __user *const __user *__argv, | ||
1594 | const char __user *const __user *__envp, | ||
1595 | int flags) | ||
1596 | { | ||
1597 | struct user_arg_ptr argv = { .ptr.native = __argv }; | ||
1598 | struct user_arg_ptr envp = { .ptr.native = __envp }; | ||
1599 | |||
1600 | return do_execveat_common(fd, filename, argv, envp, flags); | ||
1557 | } | 1601 | } |
1558 | 1602 | ||
1559 | #ifdef CONFIG_COMPAT | 1603 | #ifdef CONFIG_COMPAT |
@@ -1569,7 +1613,23 @@ static int compat_do_execve(struct filename *filename, | |||
1569 | .is_compat = true, | 1613 | .is_compat = true, |
1570 | .ptr.compat = __envp, | 1614 | .ptr.compat = __envp, |
1571 | }; | 1615 | }; |
1572 | return do_execve_common(filename, argv, envp); | 1616 | return do_execveat_common(AT_FDCWD, filename, argv, envp, 0); |
1617 | } | ||
1618 | |||
1619 | static int compat_do_execveat(int fd, struct filename *filename, | ||
1620 | const compat_uptr_t __user *__argv, | ||
1621 | const compat_uptr_t __user *__envp, | ||
1622 | int flags) | ||
1623 | { | ||
1624 | struct user_arg_ptr argv = { | ||
1625 | .is_compat = true, | ||
1626 | .ptr.compat = __argv, | ||
1627 | }; | ||
1628 | struct user_arg_ptr envp = { | ||
1629 | .is_compat = true, | ||
1630 | .ptr.compat = __envp, | ||
1631 | }; | ||
1632 | return do_execveat_common(fd, filename, argv, envp, flags); | ||
1573 | } | 1633 | } |
1574 | #endif | 1634 | #endif |
1575 | 1635 | ||
@@ -1609,6 +1669,20 @@ SYSCALL_DEFINE3(execve, | |||
1609 | { | 1669 | { |
1610 | return do_execve(getname(filename), argv, envp); | 1670 | return do_execve(getname(filename), argv, envp); |
1611 | } | 1671 | } |
1672 | |||
1673 | SYSCALL_DEFINE5(execveat, | ||
1674 | int, fd, const char __user *, filename, | ||
1675 | const char __user *const __user *, argv, | ||
1676 | const char __user *const __user *, envp, | ||
1677 | int, flags) | ||
1678 | { | ||
1679 | int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0; | ||
1680 | |||
1681 | return do_execveat(fd, | ||
1682 | getname_flags(filename, lookup_flags, NULL), | ||
1683 | argv, envp, flags); | ||
1684 | } | ||
1685 | |||
1612 | #ifdef CONFIG_COMPAT | 1686 | #ifdef CONFIG_COMPAT |
1613 | COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename, | 1687 | COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename, |
1614 | const compat_uptr_t __user *, argv, | 1688 | const compat_uptr_t __user *, argv, |
@@ -1616,4 +1690,17 @@ COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename, | |||
1616 | { | 1690 | { |
1617 | return compat_do_execve(getname(filename), argv, envp); | 1691 | return compat_do_execve(getname(filename), argv, envp); |
1618 | } | 1692 | } |
1693 | |||
1694 | COMPAT_SYSCALL_DEFINE5(execveat, int, fd, | ||
1695 | const char __user *, filename, | ||
1696 | const compat_uptr_t __user *, argv, | ||
1697 | const compat_uptr_t __user *, envp, | ||
1698 | int, flags) | ||
1699 | { | ||
1700 | int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0; | ||
1701 | |||
1702 | return compat_do_execveat(fd, | ||
1703 | getname_flags(filename, lookup_flags, NULL), | ||
1704 | argv, envp, flags); | ||
1705 | } | ||
1619 | #endif | 1706 | #endif |
diff --git a/fs/namei.c b/fs/namei.c index db5fe86319e6..ca814165d84c 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -130,7 +130,7 @@ void final_putname(struct filename *name) | |||
130 | 130 | ||
131 | #define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) | 131 | #define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) |
132 | 132 | ||
133 | static struct filename * | 133 | struct filename * |
134 | getname_flags(const char __user *filename, int flags, int *empty) | 134 | getname_flags(const char __user *filename, int flags, int *empty) |
135 | { | 135 | { |
136 | struct filename *result, *err; | 136 | struct filename *result, *err; |