diff options
Diffstat (limited to 'fs/exec.c')
-rw-r--r-- | fs/exec.c | 707 |
1 files changed, 23 insertions, 684 deletions
@@ -62,22 +62,12 @@ | |||
62 | 62 | ||
63 | #include <trace/events/task.h> | 63 | #include <trace/events/task.h> |
64 | #include "internal.h" | 64 | #include "internal.h" |
65 | #include "coredump.h" | ||
65 | 66 | ||
66 | #include <trace/events/sched.h> | 67 | #include <trace/events/sched.h> |
67 | 68 | ||
68 | int core_uses_pid; | ||
69 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | ||
70 | unsigned int core_pipe_limit; | ||
71 | int suid_dumpable = 0; | 69 | int suid_dumpable = 0; |
72 | 70 | ||
73 | struct core_name { | ||
74 | char *corename; | ||
75 | int used, size; | ||
76 | }; | ||
77 | static atomic_t call_count = ATOMIC_INIT(1); | ||
78 | |||
79 | /* The maximal length of core_pattern is also specified in sysctl.c */ | ||
80 | |||
81 | static LIST_HEAD(formats); | 71 | static LIST_HEAD(formats); |
82 | static DEFINE_RWLOCK(binfmt_lock); | 72 | static DEFINE_RWLOCK(binfmt_lock); |
83 | 73 | ||
@@ -612,7 +602,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) | |||
612 | * process cleanup to remove whatever mess we made. | 602 | * process cleanup to remove whatever mess we made. |
613 | */ | 603 | */ |
614 | if (length != move_page_tables(vma, old_start, | 604 | if (length != move_page_tables(vma, old_start, |
615 | vma, new_start, length)) | 605 | vma, new_start, length, false)) |
616 | return -ENOMEM; | 606 | return -ENOMEM; |
617 | 607 | ||
618 | lru_add_drain(); | 608 | lru_add_drain(); |
@@ -887,9 +877,11 @@ static int de_thread(struct task_struct *tsk) | |||
887 | sig->notify_count--; | 877 | sig->notify_count--; |
888 | 878 | ||
889 | while (sig->notify_count) { | 879 | while (sig->notify_count) { |
890 | __set_current_state(TASK_UNINTERRUPTIBLE); | 880 | __set_current_state(TASK_KILLABLE); |
891 | spin_unlock_irq(lock); | 881 | spin_unlock_irq(lock); |
892 | schedule(); | 882 | schedule(); |
883 | if (unlikely(__fatal_signal_pending(tsk))) | ||
884 | goto killed; | ||
893 | spin_lock_irq(lock); | 885 | spin_lock_irq(lock); |
894 | } | 886 | } |
895 | spin_unlock_irq(lock); | 887 | spin_unlock_irq(lock); |
@@ -907,9 +899,11 @@ static int de_thread(struct task_struct *tsk) | |||
907 | write_lock_irq(&tasklist_lock); | 899 | write_lock_irq(&tasklist_lock); |
908 | if (likely(leader->exit_state)) | 900 | if (likely(leader->exit_state)) |
909 | break; | 901 | break; |
910 | __set_current_state(TASK_UNINTERRUPTIBLE); | 902 | __set_current_state(TASK_KILLABLE); |
911 | write_unlock_irq(&tasklist_lock); | 903 | write_unlock_irq(&tasklist_lock); |
912 | schedule(); | 904 | schedule(); |
905 | if (unlikely(__fatal_signal_pending(tsk))) | ||
906 | goto killed; | ||
913 | } | 907 | } |
914 | 908 | ||
915 | /* | 909 | /* |
@@ -1003,40 +997,14 @@ no_thread_group: | |||
1003 | 997 | ||
1004 | BUG_ON(!thread_group_leader(tsk)); | 998 | BUG_ON(!thread_group_leader(tsk)); |
1005 | return 0; | 999 | return 0; |
1006 | } | ||
1007 | |||
1008 | /* | ||
1009 | * These functions flushes out all traces of the currently running executable | ||
1010 | * so that a new one can be started | ||
1011 | */ | ||
1012 | static void flush_old_files(struct files_struct * files) | ||
1013 | { | ||
1014 | long j = -1; | ||
1015 | struct fdtable *fdt; | ||
1016 | |||
1017 | spin_lock(&files->file_lock); | ||
1018 | for (;;) { | ||
1019 | unsigned long set, i; | ||
1020 | |||
1021 | j++; | ||
1022 | i = j * BITS_PER_LONG; | ||
1023 | fdt = files_fdtable(files); | ||
1024 | if (i >= fdt->max_fds) | ||
1025 | break; | ||
1026 | set = fdt->close_on_exec[j]; | ||
1027 | if (!set) | ||
1028 | continue; | ||
1029 | fdt->close_on_exec[j] = 0; | ||
1030 | spin_unlock(&files->file_lock); | ||
1031 | for ( ; set ; i++,set >>= 1) { | ||
1032 | if (set & 1) { | ||
1033 | sys_close(i); | ||
1034 | } | ||
1035 | } | ||
1036 | spin_lock(&files->file_lock); | ||
1037 | 1000 | ||
1038 | } | 1001 | killed: |
1039 | spin_unlock(&files->file_lock); | 1002 | /* protects against exit_notify() and __exit_signal() */ |
1003 | read_lock(&tasklist_lock); | ||
1004 | sig->group_exit_task = NULL; | ||
1005 | sig->notify_count = 0; | ||
1006 | read_unlock(&tasklist_lock); | ||
1007 | return -EAGAIN; | ||
1040 | } | 1008 | } |
1041 | 1009 | ||
1042 | char *get_task_comm(char *buf, struct task_struct *tsk) | 1010 | char *get_task_comm(char *buf, struct task_struct *tsk) |
@@ -1049,6 +1017,11 @@ char *get_task_comm(char *buf, struct task_struct *tsk) | |||
1049 | } | 1017 | } |
1050 | EXPORT_SYMBOL_GPL(get_task_comm); | 1018 | EXPORT_SYMBOL_GPL(get_task_comm); |
1051 | 1019 | ||
1020 | /* | ||
1021 | * These functions flushes out all traces of the currently running executable | ||
1022 | * so that a new one can be started | ||
1023 | */ | ||
1024 | |||
1052 | void set_task_comm(struct task_struct *tsk, char *buf) | 1025 | void set_task_comm(struct task_struct *tsk, char *buf) |
1053 | { | 1026 | { |
1054 | task_lock(tsk); | 1027 | task_lock(tsk); |
@@ -1135,7 +1108,7 @@ void setup_new_exec(struct linux_binprm * bprm) | |||
1135 | current->sas_ss_sp = current->sas_ss_size = 0; | 1108 | current->sas_ss_sp = current->sas_ss_size = 0; |
1136 | 1109 | ||
1137 | if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid())) | 1110 | if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid())) |
1138 | set_dumpable(current->mm, 1); | 1111 | set_dumpable(current->mm, SUID_DUMPABLE_ENABLED); |
1139 | else | 1112 | else |
1140 | set_dumpable(current->mm, suid_dumpable); | 1113 | set_dumpable(current->mm, suid_dumpable); |
1141 | 1114 | ||
@@ -1170,7 +1143,7 @@ void setup_new_exec(struct linux_binprm * bprm) | |||
1170 | current->self_exec_id++; | 1143 | current->self_exec_id++; |
1171 | 1144 | ||
1172 | flush_signal_handlers(current, 0); | 1145 | flush_signal_handlers(current, 0); |
1173 | flush_old_files(current->files); | 1146 | do_close_on_exec(current->files); |
1174 | } | 1147 | } |
1175 | EXPORT_SYMBOL(setup_new_exec); | 1148 | EXPORT_SYMBOL(setup_new_exec); |
1176 | 1149 | ||
@@ -1631,353 +1604,6 @@ void set_binfmt(struct linux_binfmt *new) | |||
1631 | 1604 | ||
1632 | EXPORT_SYMBOL(set_binfmt); | 1605 | EXPORT_SYMBOL(set_binfmt); |
1633 | 1606 | ||
1634 | static int expand_corename(struct core_name *cn) | ||
1635 | { | ||
1636 | char *old_corename = cn->corename; | ||
1637 | |||
1638 | cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); | ||
1639 | cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); | ||
1640 | |||
1641 | if (!cn->corename) { | ||
1642 | kfree(old_corename); | ||
1643 | return -ENOMEM; | ||
1644 | } | ||
1645 | |||
1646 | return 0; | ||
1647 | } | ||
1648 | |||
1649 | static int cn_printf(struct core_name *cn, const char *fmt, ...) | ||
1650 | { | ||
1651 | char *cur; | ||
1652 | int need; | ||
1653 | int ret; | ||
1654 | va_list arg; | ||
1655 | |||
1656 | va_start(arg, fmt); | ||
1657 | need = vsnprintf(NULL, 0, fmt, arg); | ||
1658 | va_end(arg); | ||
1659 | |||
1660 | if (likely(need < cn->size - cn->used - 1)) | ||
1661 | goto out_printf; | ||
1662 | |||
1663 | ret = expand_corename(cn); | ||
1664 | if (ret) | ||
1665 | goto expand_fail; | ||
1666 | |||
1667 | out_printf: | ||
1668 | cur = cn->corename + cn->used; | ||
1669 | va_start(arg, fmt); | ||
1670 | vsnprintf(cur, need + 1, fmt, arg); | ||
1671 | va_end(arg); | ||
1672 | cn->used += need; | ||
1673 | return 0; | ||
1674 | |||
1675 | expand_fail: | ||
1676 | return ret; | ||
1677 | } | ||
1678 | |||
1679 | static void cn_escape(char *str) | ||
1680 | { | ||
1681 | for (; *str; str++) | ||
1682 | if (*str == '/') | ||
1683 | *str = '!'; | ||
1684 | } | ||
1685 | |||
1686 | static int cn_print_exe_file(struct core_name *cn) | ||
1687 | { | ||
1688 | struct file *exe_file; | ||
1689 | char *pathbuf, *path; | ||
1690 | int ret; | ||
1691 | |||
1692 | exe_file = get_mm_exe_file(current->mm); | ||
1693 | if (!exe_file) { | ||
1694 | char *commstart = cn->corename + cn->used; | ||
1695 | ret = cn_printf(cn, "%s (path unknown)", current->comm); | ||
1696 | cn_escape(commstart); | ||
1697 | return ret; | ||
1698 | } | ||
1699 | |||
1700 | pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); | ||
1701 | if (!pathbuf) { | ||
1702 | ret = -ENOMEM; | ||
1703 | goto put_exe_file; | ||
1704 | } | ||
1705 | |||
1706 | path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); | ||
1707 | if (IS_ERR(path)) { | ||
1708 | ret = PTR_ERR(path); | ||
1709 | goto free_buf; | ||
1710 | } | ||
1711 | |||
1712 | cn_escape(path); | ||
1713 | |||
1714 | ret = cn_printf(cn, "%s", path); | ||
1715 | |||
1716 | free_buf: | ||
1717 | kfree(pathbuf); | ||
1718 | put_exe_file: | ||
1719 | fput(exe_file); | ||
1720 | return ret; | ||
1721 | } | ||
1722 | |||
1723 | /* format_corename will inspect the pattern parameter, and output a | ||
1724 | * name into corename, which must have space for at least | ||
1725 | * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. | ||
1726 | */ | ||
1727 | static int format_corename(struct core_name *cn, long signr) | ||
1728 | { | ||
1729 | const struct cred *cred = current_cred(); | ||
1730 | const char *pat_ptr = core_pattern; | ||
1731 | int ispipe = (*pat_ptr == '|'); | ||
1732 | int pid_in_pattern = 0; | ||
1733 | int err = 0; | ||
1734 | |||
1735 | cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); | ||
1736 | cn->corename = kmalloc(cn->size, GFP_KERNEL); | ||
1737 | cn->used = 0; | ||
1738 | |||
1739 | if (!cn->corename) | ||
1740 | return -ENOMEM; | ||
1741 | |||
1742 | /* Repeat as long as we have more pattern to process and more output | ||
1743 | space */ | ||
1744 | while (*pat_ptr) { | ||
1745 | if (*pat_ptr != '%') { | ||
1746 | if (*pat_ptr == 0) | ||
1747 | goto out; | ||
1748 | err = cn_printf(cn, "%c", *pat_ptr++); | ||
1749 | } else { | ||
1750 | switch (*++pat_ptr) { | ||
1751 | /* single % at the end, drop that */ | ||
1752 | case 0: | ||
1753 | goto out; | ||
1754 | /* Double percent, output one percent */ | ||
1755 | case '%': | ||
1756 | err = cn_printf(cn, "%c", '%'); | ||
1757 | break; | ||
1758 | /* pid */ | ||
1759 | case 'p': | ||
1760 | pid_in_pattern = 1; | ||
1761 | err = cn_printf(cn, "%d", | ||
1762 | task_tgid_vnr(current)); | ||
1763 | break; | ||
1764 | /* uid */ | ||
1765 | case 'u': | ||
1766 | err = cn_printf(cn, "%d", cred->uid); | ||
1767 | break; | ||
1768 | /* gid */ | ||
1769 | case 'g': | ||
1770 | err = cn_printf(cn, "%d", cred->gid); | ||
1771 | break; | ||
1772 | /* signal that caused the coredump */ | ||
1773 | case 's': | ||
1774 | err = cn_printf(cn, "%ld", signr); | ||
1775 | break; | ||
1776 | /* UNIX time of coredump */ | ||
1777 | case 't': { | ||
1778 | struct timeval tv; | ||
1779 | do_gettimeofday(&tv); | ||
1780 | err = cn_printf(cn, "%lu", tv.tv_sec); | ||
1781 | break; | ||
1782 | } | ||
1783 | /* hostname */ | ||
1784 | case 'h': { | ||
1785 | char *namestart = cn->corename + cn->used; | ||
1786 | down_read(&uts_sem); | ||
1787 | err = cn_printf(cn, "%s", | ||
1788 | utsname()->nodename); | ||
1789 | up_read(&uts_sem); | ||
1790 | cn_escape(namestart); | ||
1791 | break; | ||
1792 | } | ||
1793 | /* executable */ | ||
1794 | case 'e': { | ||
1795 | char *commstart = cn->corename + cn->used; | ||
1796 | err = cn_printf(cn, "%s", current->comm); | ||
1797 | cn_escape(commstart); | ||
1798 | break; | ||
1799 | } | ||
1800 | case 'E': | ||
1801 | err = cn_print_exe_file(cn); | ||
1802 | break; | ||
1803 | /* core limit size */ | ||
1804 | case 'c': | ||
1805 | err = cn_printf(cn, "%lu", | ||
1806 | rlimit(RLIMIT_CORE)); | ||
1807 | break; | ||
1808 | default: | ||
1809 | break; | ||
1810 | } | ||
1811 | ++pat_ptr; | ||
1812 | } | ||
1813 | |||
1814 | if (err) | ||
1815 | return err; | ||
1816 | } | ||
1817 | |||
1818 | /* Backward compatibility with core_uses_pid: | ||
1819 | * | ||
1820 | * If core_pattern does not include a %p (as is the default) | ||
1821 | * and core_uses_pid is set, then .%pid will be appended to | ||
1822 | * the filename. Do not do this for piped commands. */ | ||
1823 | if (!ispipe && !pid_in_pattern && core_uses_pid) { | ||
1824 | err = cn_printf(cn, ".%d", task_tgid_vnr(current)); | ||
1825 | if (err) | ||
1826 | return err; | ||
1827 | } | ||
1828 | out: | ||
1829 | return ispipe; | ||
1830 | } | ||
1831 | |||
1832 | static int zap_process(struct task_struct *start, int exit_code) | ||
1833 | { | ||
1834 | struct task_struct *t; | ||
1835 | int nr = 0; | ||
1836 | |||
1837 | start->signal->flags = SIGNAL_GROUP_EXIT; | ||
1838 | start->signal->group_exit_code = exit_code; | ||
1839 | start->signal->group_stop_count = 0; | ||
1840 | |||
1841 | t = start; | ||
1842 | do { | ||
1843 | task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); | ||
1844 | if (t != current && t->mm) { | ||
1845 | sigaddset(&t->pending.signal, SIGKILL); | ||
1846 | signal_wake_up(t, 1); | ||
1847 | nr++; | ||
1848 | } | ||
1849 | } while_each_thread(start, t); | ||
1850 | |||
1851 | return nr; | ||
1852 | } | ||
1853 | |||
1854 | static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, | ||
1855 | struct core_state *core_state, int exit_code) | ||
1856 | { | ||
1857 | struct task_struct *g, *p; | ||
1858 | unsigned long flags; | ||
1859 | int nr = -EAGAIN; | ||
1860 | |||
1861 | spin_lock_irq(&tsk->sighand->siglock); | ||
1862 | if (!signal_group_exit(tsk->signal)) { | ||
1863 | mm->core_state = core_state; | ||
1864 | nr = zap_process(tsk, exit_code); | ||
1865 | } | ||
1866 | spin_unlock_irq(&tsk->sighand->siglock); | ||
1867 | if (unlikely(nr < 0)) | ||
1868 | return nr; | ||
1869 | |||
1870 | if (atomic_read(&mm->mm_users) == nr + 1) | ||
1871 | goto done; | ||
1872 | /* | ||
1873 | * We should find and kill all tasks which use this mm, and we should | ||
1874 | * count them correctly into ->nr_threads. We don't take tasklist | ||
1875 | * lock, but this is safe wrt: | ||
1876 | * | ||
1877 | * fork: | ||
1878 | * None of sub-threads can fork after zap_process(leader). All | ||
1879 | * processes which were created before this point should be | ||
1880 | * visible to zap_threads() because copy_process() adds the new | ||
1881 | * process to the tail of init_task.tasks list, and lock/unlock | ||
1882 | * of ->siglock provides a memory barrier. | ||
1883 | * | ||
1884 | * do_exit: | ||
1885 | * The caller holds mm->mmap_sem. This means that the task which | ||
1886 | * uses this mm can't pass exit_mm(), so it can't exit or clear | ||
1887 | * its ->mm. | ||
1888 | * | ||
1889 | * de_thread: | ||
1890 | * It does list_replace_rcu(&leader->tasks, ¤t->tasks), | ||
1891 | * we must see either old or new leader, this does not matter. | ||
1892 | * However, it can change p->sighand, so lock_task_sighand(p) | ||
1893 | * must be used. Since p->mm != NULL and we hold ->mmap_sem | ||
1894 | * it can't fail. | ||
1895 | * | ||
1896 | * Note also that "g" can be the old leader with ->mm == NULL | ||
1897 | * and already unhashed and thus removed from ->thread_group. | ||
1898 | * This is OK, __unhash_process()->list_del_rcu() does not | ||
1899 | * clear the ->next pointer, we will find the new leader via | ||
1900 | * next_thread(). | ||
1901 | */ | ||
1902 | rcu_read_lock(); | ||
1903 | for_each_process(g) { | ||
1904 | if (g == tsk->group_leader) | ||
1905 | continue; | ||
1906 | if (g->flags & PF_KTHREAD) | ||
1907 | continue; | ||
1908 | p = g; | ||
1909 | do { | ||
1910 | if (p->mm) { | ||
1911 | if (unlikely(p->mm == mm)) { | ||
1912 | lock_task_sighand(p, &flags); | ||
1913 | nr += zap_process(p, exit_code); | ||
1914 | unlock_task_sighand(p, &flags); | ||
1915 | } | ||
1916 | break; | ||
1917 | } | ||
1918 | } while_each_thread(g, p); | ||
1919 | } | ||
1920 | rcu_read_unlock(); | ||
1921 | done: | ||
1922 | atomic_set(&core_state->nr_threads, nr); | ||
1923 | return nr; | ||
1924 | } | ||
1925 | |||
1926 | static int coredump_wait(int exit_code, struct core_state *core_state) | ||
1927 | { | ||
1928 | struct task_struct *tsk = current; | ||
1929 | struct mm_struct *mm = tsk->mm; | ||
1930 | int core_waiters = -EBUSY; | ||
1931 | |||
1932 | init_completion(&core_state->startup); | ||
1933 | core_state->dumper.task = tsk; | ||
1934 | core_state->dumper.next = NULL; | ||
1935 | |||
1936 | down_write(&mm->mmap_sem); | ||
1937 | if (!mm->core_state) | ||
1938 | core_waiters = zap_threads(tsk, mm, core_state, exit_code); | ||
1939 | up_write(&mm->mmap_sem); | ||
1940 | |||
1941 | if (core_waiters > 0) { | ||
1942 | struct core_thread *ptr; | ||
1943 | |||
1944 | wait_for_completion(&core_state->startup); | ||
1945 | /* | ||
1946 | * Wait for all the threads to become inactive, so that | ||
1947 | * all the thread context (extended register state, like | ||
1948 | * fpu etc) gets copied to the memory. | ||
1949 | */ | ||
1950 | ptr = core_state->dumper.next; | ||
1951 | while (ptr != NULL) { | ||
1952 | wait_task_inactive(ptr->task, 0); | ||
1953 | ptr = ptr->next; | ||
1954 | } | ||
1955 | } | ||
1956 | |||
1957 | return core_waiters; | ||
1958 | } | ||
1959 | |||
1960 | static void coredump_finish(struct mm_struct *mm) | ||
1961 | { | ||
1962 | struct core_thread *curr, *next; | ||
1963 | struct task_struct *task; | ||
1964 | |||
1965 | next = mm->core_state->dumper.next; | ||
1966 | while ((curr = next) != NULL) { | ||
1967 | next = curr->next; | ||
1968 | task = curr->task; | ||
1969 | /* | ||
1970 | * see exit_mm(), curr->task must not see | ||
1971 | * ->task == NULL before we read ->next. | ||
1972 | */ | ||
1973 | smp_mb(); | ||
1974 | curr->task = NULL; | ||
1975 | wake_up_process(task); | ||
1976 | } | ||
1977 | |||
1978 | mm->core_state = NULL; | ||
1979 | } | ||
1980 | |||
1981 | /* | 1607 | /* |
1982 | * set_dumpable converts traditional three-value dumpable to two flags and | 1608 | * set_dumpable converts traditional three-value dumpable to two flags and |
1983 | * stores them into mm->flags. It modifies lower two bits of mm->flags, but | 1609 | * stores them into mm->flags. It modifies lower two bits of mm->flags, but |
@@ -2019,7 +1645,7 @@ void set_dumpable(struct mm_struct *mm, int value) | |||
2019 | } | 1645 | } |
2020 | } | 1646 | } |
2021 | 1647 | ||
2022 | static int __get_dumpable(unsigned long mm_flags) | 1648 | int __get_dumpable(unsigned long mm_flags) |
2023 | { | 1649 | { |
2024 | int ret; | 1650 | int ret; |
2025 | 1651 | ||
@@ -2032,293 +1658,6 @@ int get_dumpable(struct mm_struct *mm) | |||
2032 | return __get_dumpable(mm->flags); | 1658 | return __get_dumpable(mm->flags); |
2033 | } | 1659 | } |
2034 | 1660 | ||
2035 | static void wait_for_dump_helpers(struct file *file) | ||
2036 | { | ||
2037 | struct pipe_inode_info *pipe; | ||
2038 | |||
2039 | pipe = file->f_path.dentry->d_inode->i_pipe; | ||
2040 | |||
2041 | pipe_lock(pipe); | ||
2042 | pipe->readers++; | ||
2043 | pipe->writers--; | ||
2044 | |||
2045 | while ((pipe->readers > 1) && (!signal_pending(current))) { | ||
2046 | wake_up_interruptible_sync(&pipe->wait); | ||
2047 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | ||
2048 | pipe_wait(pipe); | ||
2049 | } | ||
2050 | |||
2051 | pipe->readers--; | ||
2052 | pipe->writers++; | ||
2053 | pipe_unlock(pipe); | ||
2054 | |||
2055 | } | ||
2056 | |||
2057 | |||
2058 | /* | ||
2059 | * umh_pipe_setup | ||
2060 | * helper function to customize the process used | ||
2061 | * to collect the core in userspace. Specifically | ||
2062 | * it sets up a pipe and installs it as fd 0 (stdin) | ||
2063 | * for the process. Returns 0 on success, or | ||
2064 | * PTR_ERR on failure. | ||
2065 | * Note that it also sets the core limit to 1. This | ||
2066 | * is a special value that we use to trap recursive | ||
2067 | * core dumps | ||
2068 | */ | ||
2069 | static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) | ||
2070 | { | ||
2071 | struct file *files[2]; | ||
2072 | struct fdtable *fdt; | ||
2073 | struct coredump_params *cp = (struct coredump_params *)info->data; | ||
2074 | struct files_struct *cf = current->files; | ||
2075 | int err = create_pipe_files(files, 0); | ||
2076 | if (err) | ||
2077 | return err; | ||
2078 | |||
2079 | cp->file = files[1]; | ||
2080 | |||
2081 | sys_close(0); | ||
2082 | fd_install(0, files[0]); | ||
2083 | spin_lock(&cf->file_lock); | ||
2084 | fdt = files_fdtable(cf); | ||
2085 | __set_open_fd(0, fdt); | ||
2086 | __clear_close_on_exec(0, fdt); | ||
2087 | spin_unlock(&cf->file_lock); | ||
2088 | |||
2089 | /* and disallow core files too */ | ||
2090 | current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; | ||
2091 | |||
2092 | return 0; | ||
2093 | } | ||
2094 | |||
2095 | void do_coredump(long signr, int exit_code, struct pt_regs *regs) | ||
2096 | { | ||
2097 | struct core_state core_state; | ||
2098 | struct core_name cn; | ||
2099 | struct mm_struct *mm = current->mm; | ||
2100 | struct linux_binfmt * binfmt; | ||
2101 | const struct cred *old_cred; | ||
2102 | struct cred *cred; | ||
2103 | int retval = 0; | ||
2104 | int flag = 0; | ||
2105 | int ispipe; | ||
2106 | bool need_nonrelative = false; | ||
2107 | static atomic_t core_dump_count = ATOMIC_INIT(0); | ||
2108 | struct coredump_params cprm = { | ||
2109 | .signr = signr, | ||
2110 | .regs = regs, | ||
2111 | .limit = rlimit(RLIMIT_CORE), | ||
2112 | /* | ||
2113 | * We must use the same mm->flags while dumping core to avoid | ||
2114 | * inconsistency of bit flags, since this flag is not protected | ||
2115 | * by any locks. | ||
2116 | */ | ||
2117 | .mm_flags = mm->flags, | ||
2118 | }; | ||
2119 | |||
2120 | audit_core_dumps(signr); | ||
2121 | |||
2122 | binfmt = mm->binfmt; | ||
2123 | if (!binfmt || !binfmt->core_dump) | ||
2124 | goto fail; | ||
2125 | if (!__get_dumpable(cprm.mm_flags)) | ||
2126 | goto fail; | ||
2127 | |||
2128 | cred = prepare_creds(); | ||
2129 | if (!cred) | ||
2130 | goto fail; | ||
2131 | /* | ||
2132 | * We cannot trust fsuid as being the "true" uid of the process | ||
2133 | * nor do we know its entire history. We only know it was tainted | ||
2134 | * so we dump it as root in mode 2, and only into a controlled | ||
2135 | * environment (pipe handler or fully qualified path). | ||
2136 | */ | ||
2137 | if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { | ||
2138 | /* Setuid core dump mode */ | ||
2139 | flag = O_EXCL; /* Stop rewrite attacks */ | ||
2140 | cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ | ||
2141 | need_nonrelative = true; | ||
2142 | } | ||
2143 | |||
2144 | retval = coredump_wait(exit_code, &core_state); | ||
2145 | if (retval < 0) | ||
2146 | goto fail_creds; | ||
2147 | |||
2148 | old_cred = override_creds(cred); | ||
2149 | |||
2150 | /* | ||
2151 | * Clear any false indication of pending signals that might | ||
2152 | * be seen by the filesystem code called to write the core file. | ||
2153 | */ | ||
2154 | clear_thread_flag(TIF_SIGPENDING); | ||
2155 | |||
2156 | ispipe = format_corename(&cn, signr); | ||
2157 | |||
2158 | if (ispipe) { | ||
2159 | int dump_count; | ||
2160 | char **helper_argv; | ||
2161 | |||
2162 | if (ispipe < 0) { | ||
2163 | printk(KERN_WARNING "format_corename failed\n"); | ||
2164 | printk(KERN_WARNING "Aborting core\n"); | ||
2165 | goto fail_corename; | ||
2166 | } | ||
2167 | |||
2168 | if (cprm.limit == 1) { | ||
2169 | /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. | ||
2170 | * | ||
2171 | * Normally core limits are irrelevant to pipes, since | ||
2172 | * we're not writing to the file system, but we use | ||
2173 | * cprm.limit of 1 here as a speacial value, this is a | ||
2174 | * consistent way to catch recursive crashes. | ||
2175 | * We can still crash if the core_pattern binary sets | ||
2176 | * RLIM_CORE = !1, but it runs as root, and can do | ||
2177 | * lots of stupid things. | ||
2178 | * | ||
2179 | * Note that we use task_tgid_vnr here to grab the pid | ||
2180 | * of the process group leader. That way we get the | ||
2181 | * right pid if a thread in a multi-threaded | ||
2182 | * core_pattern process dies. | ||
2183 | */ | ||
2184 | printk(KERN_WARNING | ||
2185 | "Process %d(%s) has RLIMIT_CORE set to 1\n", | ||
2186 | task_tgid_vnr(current), current->comm); | ||
2187 | printk(KERN_WARNING "Aborting core\n"); | ||
2188 | goto fail_unlock; | ||
2189 | } | ||
2190 | cprm.limit = RLIM_INFINITY; | ||
2191 | |||
2192 | dump_count = atomic_inc_return(&core_dump_count); | ||
2193 | if (core_pipe_limit && (core_pipe_limit < dump_count)) { | ||
2194 | printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", | ||
2195 | task_tgid_vnr(current), current->comm); | ||
2196 | printk(KERN_WARNING "Skipping core dump\n"); | ||
2197 | goto fail_dropcount; | ||
2198 | } | ||
2199 | |||
2200 | helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); | ||
2201 | if (!helper_argv) { | ||
2202 | printk(KERN_WARNING "%s failed to allocate memory\n", | ||
2203 | __func__); | ||
2204 | goto fail_dropcount; | ||
2205 | } | ||
2206 | |||
2207 | retval = call_usermodehelper_fns(helper_argv[0], helper_argv, | ||
2208 | NULL, UMH_WAIT_EXEC, umh_pipe_setup, | ||
2209 | NULL, &cprm); | ||
2210 | argv_free(helper_argv); | ||
2211 | if (retval) { | ||
2212 | printk(KERN_INFO "Core dump to %s pipe failed\n", | ||
2213 | cn.corename); | ||
2214 | goto close_fail; | ||
2215 | } | ||
2216 | } else { | ||
2217 | struct inode *inode; | ||
2218 | |||
2219 | if (cprm.limit < binfmt->min_coredump) | ||
2220 | goto fail_unlock; | ||
2221 | |||
2222 | if (need_nonrelative && cn.corename[0] != '/') { | ||
2223 | printk(KERN_WARNING "Pid %d(%s) can only dump core "\ | ||
2224 | "to fully qualified path!\n", | ||
2225 | task_tgid_vnr(current), current->comm); | ||
2226 | printk(KERN_WARNING "Skipping core dump\n"); | ||
2227 | goto fail_unlock; | ||
2228 | } | ||
2229 | |||
2230 | cprm.file = filp_open(cn.corename, | ||
2231 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, | ||
2232 | 0600); | ||
2233 | if (IS_ERR(cprm.file)) | ||
2234 | goto fail_unlock; | ||
2235 | |||
2236 | inode = cprm.file->f_path.dentry->d_inode; | ||
2237 | if (inode->i_nlink > 1) | ||
2238 | goto close_fail; | ||
2239 | if (d_unhashed(cprm.file->f_path.dentry)) | ||
2240 | goto close_fail; | ||
2241 | /* | ||
2242 | * AK: actually i see no reason to not allow this for named | ||
2243 | * pipes etc, but keep the previous behaviour for now. | ||
2244 | */ | ||
2245 | if (!S_ISREG(inode->i_mode)) | ||
2246 | goto close_fail; | ||
2247 | /* | ||
2248 | * Dont allow local users get cute and trick others to coredump | ||
2249 | * into their pre-created files. | ||
2250 | */ | ||
2251 | if (!uid_eq(inode->i_uid, current_fsuid())) | ||
2252 | goto close_fail; | ||
2253 | if (!cprm.file->f_op || !cprm.file->f_op->write) | ||
2254 | goto close_fail; | ||
2255 | if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) | ||
2256 | goto close_fail; | ||
2257 | } | ||
2258 | |||
2259 | retval = binfmt->core_dump(&cprm); | ||
2260 | if (retval) | ||
2261 | current->signal->group_exit_code |= 0x80; | ||
2262 | |||
2263 | if (ispipe && core_pipe_limit) | ||
2264 | wait_for_dump_helpers(cprm.file); | ||
2265 | close_fail: | ||
2266 | if (cprm.file) | ||
2267 | filp_close(cprm.file, NULL); | ||
2268 | fail_dropcount: | ||
2269 | if (ispipe) | ||
2270 | atomic_dec(&core_dump_count); | ||
2271 | fail_unlock: | ||
2272 | kfree(cn.corename); | ||
2273 | fail_corename: | ||
2274 | coredump_finish(mm); | ||
2275 | revert_creds(old_cred); | ||
2276 | fail_creds: | ||
2277 | put_cred(cred); | ||
2278 | fail: | ||
2279 | return; | ||
2280 | } | ||
2281 | |||
2282 | /* | ||
2283 | * Core dumping helper functions. These are the only things you should | ||
2284 | * do on a core-file: use only these functions to write out all the | ||
2285 | * necessary info. | ||
2286 | */ | ||
2287 | int dump_write(struct file *file, const void *addr, int nr) | ||
2288 | { | ||
2289 | return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; | ||
2290 | } | ||
2291 | EXPORT_SYMBOL(dump_write); | ||
2292 | |||
2293 | int dump_seek(struct file *file, loff_t off) | ||
2294 | { | ||
2295 | int ret = 1; | ||
2296 | |||
2297 | if (file->f_op->llseek && file->f_op->llseek != no_llseek) { | ||
2298 | if (file->f_op->llseek(file, off, SEEK_CUR) < 0) | ||
2299 | return 0; | ||
2300 | } else { | ||
2301 | char *buf = (char *)get_zeroed_page(GFP_KERNEL); | ||
2302 | |||
2303 | if (!buf) | ||
2304 | return 0; | ||
2305 | while (off > 0) { | ||
2306 | unsigned long n = off; | ||
2307 | |||
2308 | if (n > PAGE_SIZE) | ||
2309 | n = PAGE_SIZE; | ||
2310 | if (!dump_write(file, buf, n)) { | ||
2311 | ret = 0; | ||
2312 | break; | ||
2313 | } | ||
2314 | off -= n; | ||
2315 | } | ||
2316 | free_page((unsigned long)buf); | ||
2317 | } | ||
2318 | return ret; | ||
2319 | } | ||
2320 | EXPORT_SYMBOL(dump_seek); | ||
2321 | |||
2322 | #ifdef __ARCH_WANT_SYS_EXECVE | 1661 | #ifdef __ARCH_WANT_SYS_EXECVE |
2323 | SYSCALL_DEFINE3(execve, | 1662 | SYSCALL_DEFINE3(execve, |
2324 | const char __user *, filename, | 1663 | const char __user *, filename, |