aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid Drysdale <drysdale@google.com>2014-12-12 19:57:29 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-13 15:42:51 -0500
commit51f39a1f0cea1cacf8c787f652f26dfee9611874 (patch)
tree4b9199e785bdd9e8c0c55a0ec94ce8d268885bc5 /fs
parentc0ef0cc9d277f0f2a83b5a287a816b3916d9f026 (diff)
syscalls: implement execveat() system call
This patchset adds execveat(2) for x86, and is derived from Meredydd Luff's patch from Sept 2012 (https://lkml.org/lkml/2012/9/11/528). The primary aim of adding an execveat syscall is to allow an implementation of fexecve(3) that does not rely on the /proc filesystem, at least for executables (rather than scripts). The current glibc version of fexecve(3) is implemented via /proc, which causes problems in sandboxed or otherwise restricted environments. Given the desire for a /proc-free fexecve() implementation, HPA suggested (https://lkml.org/lkml/2006/7/11/556) that an execveat(2) syscall would be an appropriate generalization. Also, having a new syscall means that it can take a flags argument without back-compatibility concerns. The current implementation just defines the AT_EMPTY_PATH and AT_SYMLINK_NOFOLLOW flags, but other flags could be added in future -- for example, flags for new namespaces (as suggested at https://lkml.org/lkml/2006/7/11/474). Related history: - https://lkml.org/lkml/2006/12/27/123 is an example of someone realizing that fexecve() is likely to fail in a chroot environment. - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=514043 covered documenting the /proc requirement of fexecve(3) in its manpage, to "prevent other people from wasting their time". - https://bugzilla.redhat.com/show_bug.cgi?id=241609 described a problem where a process that did setuid() could not fexecve() because it no longer had access to /proc/self/fd; this has since been fixed. This patch (of 4): Add a new execveat(2) system call. execveat() is to execve() as openat() is to open(): it takes a file descriptor that refers to a directory, and resolves the filename relative to that. In addition, if the filename is empty and AT_EMPTY_PATH is specified, execveat() executes the file to which the file descriptor refers. This replicates the functionality of fexecve(), which is a system call in other UNIXen, but in Linux glibc it depends on opening "/proc/self/fd/<fd>" (and so relies on /proc being mounted). The filename fed to the executed program as argv[0] (or the name of the script fed to a script interpreter) will be of the form "/dev/fd/<fd>" (for an empty filename) or "/dev/fd/<fd>/<filename>", effectively reflecting how the executable was found. This does however mean that execution of a script in a /proc-less environment won't work; also, script execution via an O_CLOEXEC file descriptor fails (as the file will not be accessible after exec). Based on patches by Meredydd Luff. Signed-off-by: David Drysdale <drysdale@google.com> Cc: Meredydd Luff <meredydd@senatehouse.org> Cc: Shuah Khan <shuah.kh@samsung.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Kees Cook <keescook@chromium.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Rich Felker <dalias@aerifal.cx> Cc: Christoph Hellwig <hch@infradead.org> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/binfmt_em86.c4
-rw-r--r--fs/binfmt_misc.c4
-rw-r--r--fs/binfmt_script.c10
-rw-r--r--fs/exec.c113
-rw-r--r--fs/namei.c2
5 files changed, 119 insertions, 14 deletions
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index f37b08cea1f7..490538536cb4 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -42,6 +42,10 @@ static int load_em86(struct linux_binprm *bprm)
42 return -ENOEXEC; 42 return -ENOEXEC;
43 } 43 }
44 44
45 /* Need to be able to load the file after exec */
46 if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
47 return -ENOENT;
48
45 allow_write_access(bprm->file); 49 allow_write_access(bprm->file);
46 fput(bprm->file); 50 fput(bprm->file);
47 bprm->file = NULL; 51 bprm->file = NULL;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 70789e198dea..c04ef1d4f18a 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -144,6 +144,10 @@ static int load_misc_binary(struct linux_binprm *bprm)
144 if (!fmt) 144 if (!fmt)
145 goto ret; 145 goto ret;
146 146
147 /* Need to be able to load the file after exec */
148 if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
149 return -ENOENT;
150
147 if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) { 151 if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
148 retval = remove_arg_zero(bprm); 152 retval = remove_arg_zero(bprm);
149 if (retval) 153 if (retval)
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 5027a3e14922..afdf4e3cafc2 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -24,6 +24,16 @@ static int load_script(struct linux_binprm *bprm)
24 24
25 if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) 25 if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!'))
26 return -ENOEXEC; 26 return -ENOEXEC;
27
28 /*
29 * If the script filename will be inaccessible after exec, typically
30 * because it is a "/dev/fd/<fd>/.." path against an O_CLOEXEC fd, give
31 * up now (on the assumption that the interpreter will want to load
32 * this file).
33 */
34 if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
35 return -ENOENT;
36
27 /* 37 /*
28 * This section does the #! interpretation. 38 * This section does the #! interpretation.
29 * Sorta complicated, but hopefully it will work. -TYT 39 * Sorta complicated, but hopefully it will work. -TYT
diff --git a/fs/exec.c b/fs/exec.c
index 01aebe300200..ad8798e26be9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -748,18 +748,25 @@ EXPORT_SYMBOL(setup_arg_pages);
748 748
749#endif /* CONFIG_MMU */ 749#endif /* CONFIG_MMU */
750 750
751static struct file *do_open_exec(struct filename *name) 751static struct file *do_open_execat(int fd, struct filename *name, int flags)
752{ 752{
753 struct file *file; 753 struct file *file;
754 int err; 754 int err;
755 static const struct open_flags open_exec_flags = { 755 struct open_flags open_exec_flags = {
756 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 756 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
757 .acc_mode = MAY_EXEC | MAY_OPEN, 757 .acc_mode = MAY_EXEC | MAY_OPEN,
758 .intent = LOOKUP_OPEN, 758 .intent = LOOKUP_OPEN,
759 .lookup_flags = LOOKUP_FOLLOW, 759 .lookup_flags = LOOKUP_FOLLOW,
760 }; 760 };
761 761
762 file = do_filp_open(AT_FDCWD, name, &open_exec_flags); 762 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
763 return ERR_PTR(-EINVAL);
764 if (flags & AT_SYMLINK_NOFOLLOW)
765 open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
766 if (flags & AT_EMPTY_PATH)
767 open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
768
769 file = do_filp_open(fd, name, &open_exec_flags);
763 if (IS_ERR(file)) 770 if (IS_ERR(file))
764 goto out; 771 goto out;
765 772
@@ -770,12 +777,13 @@ static struct file *do_open_exec(struct filename *name)
770 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) 777 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
771 goto exit; 778 goto exit;
772 779
773 fsnotify_open(file);
774
775 err = deny_write_access(file); 780 err = deny_write_access(file);
776 if (err) 781 if (err)
777 goto exit; 782 goto exit;
778 783
784 if (name->name[0] != '\0')
785 fsnotify_open(file);
786
779out: 787out:
780 return file; 788 return file;
781 789
@@ -787,7 +795,7 @@ exit:
787struct file *open_exec(const char *name) 795struct file *open_exec(const char *name)
788{ 796{
789 struct filename tmp = { .name = name }; 797 struct filename tmp = { .name = name };
790 return do_open_exec(&tmp); 798 return do_open_execat(AT_FDCWD, &tmp, 0);
791} 799}
792EXPORT_SYMBOL(open_exec); 800EXPORT_SYMBOL(open_exec);
793 801
@@ -1428,10 +1436,12 @@ static int exec_binprm(struct linux_binprm *bprm)
1428/* 1436/*
1429 * sys_execve() executes a new program. 1437 * sys_execve() executes a new program.
1430 */ 1438 */
1431static int do_execve_common(struct filename *filename, 1439static int do_execveat_common(int fd, struct filename *filename,
1432 struct user_arg_ptr argv, 1440 struct user_arg_ptr argv,
1433 struct user_arg_ptr envp) 1441 struct user_arg_ptr envp,
1442 int flags)
1434{ 1443{
1444 char *pathbuf = NULL;
1435 struct linux_binprm *bprm; 1445 struct linux_binprm *bprm;
1436 struct file *file; 1446 struct file *file;
1437 struct files_struct *displaced; 1447 struct files_struct *displaced;
@@ -1472,7 +1482,7 @@ static int do_execve_common(struct filename *filename,
1472 check_unsafe_exec(bprm); 1482 check_unsafe_exec(bprm);
1473 current->in_execve = 1; 1483 current->in_execve = 1;
1474 1484
1475 file = do_open_exec(filename); 1485 file = do_open_execat(fd, filename, flags);
1476 retval = PTR_ERR(file); 1486 retval = PTR_ERR(file);
1477 if (IS_ERR(file)) 1487 if (IS_ERR(file))
1478 goto out_unmark; 1488 goto out_unmark;
@@ -1480,7 +1490,28 @@ static int do_execve_common(struct filename *filename,
1480 sched_exec(); 1490 sched_exec();
1481 1491
1482 bprm->file = file; 1492 bprm->file = file;
1483 bprm->filename = bprm->interp = filename->name; 1493 if (fd == AT_FDCWD || filename->name[0] == '/') {
1494 bprm->filename = filename->name;
1495 } else {
1496 if (filename->name[0] == '\0')
1497 pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d", fd);
1498 else
1499 pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d/%s",
1500 fd, filename->name);
1501 if (!pathbuf) {
1502 retval = -ENOMEM;
1503 goto out_unmark;
1504 }
1505 /*
1506 * Record that a name derived from an O_CLOEXEC fd will be
1507 * inaccessible after exec. Relies on having exclusive access to
1508 * current->files (due to unshare_files above).
1509 */
1510 if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
1511 bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
1512 bprm->filename = pathbuf;
1513 }
1514 bprm->interp = bprm->filename;
1484 1515
1485 retval = bprm_mm_init(bprm); 1516 retval = bprm_mm_init(bprm);
1486 if (retval) 1517 if (retval)
@@ -1521,6 +1552,7 @@ static int do_execve_common(struct filename *filename,
1521 acct_update_integrals(current); 1552 acct_update_integrals(current);
1522 task_numa_free(current); 1553 task_numa_free(current);
1523 free_bprm(bprm); 1554 free_bprm(bprm);
1555 kfree(pathbuf);
1524 putname(filename); 1556 putname(filename);
1525 if (displaced) 1557 if (displaced)
1526 put_files_struct(displaced); 1558 put_files_struct(displaced);
@@ -1538,6 +1570,7 @@ out_unmark:
1538 1570
1539out_free: 1571out_free:
1540 free_bprm(bprm); 1572 free_bprm(bprm);
1573 kfree(pathbuf);
1541 1574
1542out_files: 1575out_files:
1543 if (displaced) 1576 if (displaced)
@@ -1553,7 +1586,18 @@ int do_execve(struct filename *filename,
1553{ 1586{
1554 struct user_arg_ptr argv = { .ptr.native = __argv }; 1587 struct user_arg_ptr argv = { .ptr.native = __argv };
1555 struct user_arg_ptr envp = { .ptr.native = __envp }; 1588 struct user_arg_ptr envp = { .ptr.native = __envp };
1556 return do_execve_common(filename, argv, envp); 1589 return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
1590}
1591
1592int do_execveat(int fd, struct filename *filename,
1593 const char __user *const __user *__argv,
1594 const char __user *const __user *__envp,
1595 int flags)
1596{
1597 struct user_arg_ptr argv = { .ptr.native = __argv };
1598 struct user_arg_ptr envp = { .ptr.native = __envp };
1599
1600 return do_execveat_common(fd, filename, argv, envp, flags);
1557} 1601}
1558 1602
1559#ifdef CONFIG_COMPAT 1603#ifdef CONFIG_COMPAT
@@ -1569,7 +1613,23 @@ static int compat_do_execve(struct filename *filename,
1569 .is_compat = true, 1613 .is_compat = true,
1570 .ptr.compat = __envp, 1614 .ptr.compat = __envp,
1571 }; 1615 };
1572 return do_execve_common(filename, argv, envp); 1616 return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
1617}
1618
1619static int compat_do_execveat(int fd, struct filename *filename,
1620 const compat_uptr_t __user *__argv,
1621 const compat_uptr_t __user *__envp,
1622 int flags)
1623{
1624 struct user_arg_ptr argv = {
1625 .is_compat = true,
1626 .ptr.compat = __argv,
1627 };
1628 struct user_arg_ptr envp = {
1629 .is_compat = true,
1630 .ptr.compat = __envp,
1631 };
1632 return do_execveat_common(fd, filename, argv, envp, flags);
1573} 1633}
1574#endif 1634#endif
1575 1635
@@ -1609,6 +1669,20 @@ SYSCALL_DEFINE3(execve,
1609{ 1669{
1610 return do_execve(getname(filename), argv, envp); 1670 return do_execve(getname(filename), argv, envp);
1611} 1671}
1672
1673SYSCALL_DEFINE5(execveat,
1674 int, fd, const char __user *, filename,
1675 const char __user *const __user *, argv,
1676 const char __user *const __user *, envp,
1677 int, flags)
1678{
1679 int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
1680
1681 return do_execveat(fd,
1682 getname_flags(filename, lookup_flags, NULL),
1683 argv, envp, flags);
1684}
1685
1612#ifdef CONFIG_COMPAT 1686#ifdef CONFIG_COMPAT
1613COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename, 1687COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
1614 const compat_uptr_t __user *, argv, 1688 const compat_uptr_t __user *, argv,
@@ -1616,4 +1690,17 @@ COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
1616{ 1690{
1617 return compat_do_execve(getname(filename), argv, envp); 1691 return compat_do_execve(getname(filename), argv, envp);
1618} 1692}
1693
1694COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
1695 const char __user *, filename,
1696 const compat_uptr_t __user *, argv,
1697 const compat_uptr_t __user *, envp,
1698 int, flags)
1699{
1700 int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
1701
1702 return compat_do_execveat(fd,
1703 getname_flags(filename, lookup_flags, NULL),
1704 argv, envp, flags);
1705}
1619#endif 1706#endif
diff --git a/fs/namei.c b/fs/namei.c
index db5fe86319e6..ca814165d84c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -130,7 +130,7 @@ void final_putname(struct filename *name)
130 130
131#define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) 131#define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename))
132 132
133static struct filename * 133struct filename *
134getname_flags(const char __user *filename, int flags, int *empty) 134getname_flags(const char __user *filename, int flags, int *empty)
135{ 135{
136 struct filename *result, *err; 136 struct filename *result, *err;