1 files changed, 686 insertions, 0 deletions
diff --git a/fs/coredump.c b/fs/coredump.c
new file mode 100644
index 00000000000..f045bbad682
--- /dev/null
+++ b/fs/coredump.c
@@ -0,0 +1,686 @@
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/mm.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/swap.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/perf_event.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/key.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/utsname.h>
+#include <linux/pid_namespace.h>
+#include <linux/module.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/tsacct_kern.h>
+#include <linux/cn_proc.h>
+#include <linux/audit.h>
+#include <linux/tracehook.h>
+#include <linux/kmod.h>
+#include <linux/fsnotify.h>
+#include <linux/fs_struct.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/oom.h>
+#include <linux/compat.h>
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+#include <asm/exec.h>
+#include <trace/events/task.h>
+#include "internal.h"
+#include <trace/events/sched.h>
+int core_uses_pid;
+char core_pattern[CORENAME_MAX_SIZE] = "core";
+unsigned int core_pipe_limit;
+struct core_name {
+        char *corename;
+        int used, size;
+};
+static atomic_t call_count = ATOMIC_INIT(1);
+/* The maximal length of core_pattern is also specified in sysctl.c */
+static int expand_corename(struct core_name *cn)
+{
+        char *old_corename = cn->corename;
+        cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
+        cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
+        if (!cn->corename) {
+                kfree(old_corename);
+                return -ENOMEM;
+        }
+        return 0;
+}
+static int cn_printf(struct core_name *cn, const char *fmt, ...)
+{
+        char *cur;
+        int need;
+        int ret;
+        va_list arg;
+        va_start(arg, fmt);
+        need = vsnprintf(NULL, 0, fmt, arg);
+        va_end(arg);
+        if (likely(need < cn->size - cn->used - 1))
+                goto out_printf;
+        ret = expand_corename(cn);
+        if (ret)
+                goto expand_fail;
+out_printf:
+        cur = cn->corename + cn->used;
+        va_start(arg, fmt);
+        vsnprintf(cur, need + 1, fmt, arg);
+        va_end(arg);
+        cn->used += need;
+        return 0;
+expand_fail:
+        return ret;
+}
+static void cn_escape(char *str)
+{
+        for (; *str; str++)
+                if (*str == '/')
+                        *str = '!';
+}
+static int cn_print_exe_file(struct core_name *cn)
+{
+        struct file *exe_file;
+        char *pathbuf, *path;
+        int ret;
+        exe_file = get_mm_exe_file(current->mm);
+        if (!exe_file) {
+                char *commstart = cn->corename + cn->used;
+                ret = cn_printf(cn, "%s (path unknown)", current->comm);
+                cn_escape(commstart);
+                return ret;
+        }
+        pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
+        if (!pathbuf) {
+                ret = -ENOMEM;
+                goto put_exe_file;
+        }
+        path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
+        if (IS_ERR(path)) {
+                ret = PTR_ERR(path);
+                goto free_buf;
+        }
+        cn_escape(path);
+        ret = cn_printf(cn, "%s", path);
+free_buf:
+        kfree(pathbuf);
+put_exe_file:
+        fput(exe_file);
+        return ret;
+}
+/* format_corename will inspect the pattern parameter, and output a
+ * name into corename, which must have space for at least
+ * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
+ */
+static int format_corename(struct core_name *cn, long signr)
+{
+        const struct cred *cred = current_cred();
+        const char *pat_ptr = core_pattern;
+        int ispipe = (*pat_ptr == '|');
+        int pid_in_pattern = 0;
+        int err = 0;
+        cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
+        cn->corename = kmalloc(cn->size, GFP_KERNEL);
+        cn->used = 0;
+        if (!cn->corename)
+                return -ENOMEM;
+        /* Repeat as long as we have more pattern to process and more output
+           space */
+        while (*pat_ptr) {
+                if (*pat_ptr != '%') {
+                        if (*pat_ptr == 0)
+                                goto out;
+                        err = cn_printf(cn, "%c", *pat_ptr++);
+                } else {
+                        switch (*++pat_ptr) {
+                        /* single % at the end, drop that */
+                        case 0:
+                                goto out;
+                        /* Double percent, output one percent */
+                        case '%':
+                                err = cn_printf(cn, "%c", '%');
+                                break;
+                        /* pid */
+                        case 'p':
+                                pid_in_pattern = 1;
+                                err = cn_printf(cn, "%d",
+                                              task_tgid_vnr(current));
+                                break;
+                        /* uid */
+                        case 'u':
+                                err = cn_printf(cn, "%d", cred->uid);
+                                break;
+                        /* gid */
+                        case 'g':
+                                err = cn_printf(cn, "%d", cred->gid);
+                                break;
+                        /* signal that caused the coredump */
+                        case 's':
+                                err = cn_printf(cn, "%ld", signr);
+                                break;
+                        /* UNIX time of coredump */
+                        case 't': {
+                                struct timeval tv;
+                                do_gettimeofday(&tv);
+                                err = cn_printf(cn, "%lu", tv.tv_sec);
+                                break;
+                        }
+                        /* hostname */
+                        case 'h': {
+                                char *namestart = cn->corename + cn->used;
+                                down_read(&uts_sem);
+                                err = cn_printf(cn, "%s",
+                                              utsname()->nodename);
+                                up_read(&uts_sem);
+                                cn_escape(namestart);
+                                break;
+                        }
+                        /* executable */
+                        case 'e': {
+                                char *commstart = cn->corename + cn->used;
+                                err = cn_printf(cn, "%s", current->comm);
+                                cn_escape(commstart);
+                                break;
+                        }
+                        case 'E':
+                                err = cn_print_exe_file(cn);
+                                break;
+                        /* core limit size */
+                        case 'c':
+                                err = cn_printf(cn, "%lu",
+                                              rlimit(RLIMIT_CORE));
+                                break;
+                        default:
+                                break;
+                        }
+                        ++pat_ptr;
+                }
+                if (err)
+                        return err;
+        }
+        /* Backward compatibility with core_uses_pid:
+         *
+         * If core_pattern does not include a %p (as is the default)
+         * and core_uses_pid is set, then .%pid will be appended to
+         * the filename. Do not do this for piped commands. */
+        if (!ispipe && !pid_in_pattern && core_uses_pid) {
+                err = cn_printf(cn, ".%d", task_tgid_vnr(current));
+                if (err)
+                        return err;
+        }
+out:
+        return ispipe;
+}
+static int zap_process(struct task_struct *start, int exit_code)
+{
+        struct task_struct *t;
+        int nr = 0;
+        start->signal->flags = SIGNAL_GROUP_EXIT;
+        start->signal->group_exit_code = exit_code;
+        start->signal->group_stop_count = 0;
+        t = start;
+        do {
+                task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
+                if (t != current && t->mm) {
+                        sigaddset(&t->pending.signal, SIGKILL);
+                        signal_wake_up(t, 1);
+                        nr++;
+                }
+        } while_each_thread(start, t);
+        return nr;
+}
+static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
+                                struct core_state *core_state, int exit_code)
+{
+        struct task_struct *g, *p;
+        unsigned long flags;
+        int nr = -EAGAIN;
+        spin_lock_irq(&tsk->sighand->siglock);
+        if (!signal_group_exit(tsk->signal)) {
+                mm->core_state = core_state;
+                nr = zap_process(tsk, exit_code);
+        }
+        spin_unlock_irq(&tsk->sighand->siglock);
+        if (unlikely(nr < 0))
+                return nr;
+        if (atomic_read(&mm->mm_users) == nr + 1)
+                goto done;
+        /*
+         * We should find and kill all tasks which use this mm, and we should
+         * count them correctly into ->nr_threads. We don't take tasklist
+         * lock, but this is safe wrt:
+         *
+         * fork:
+         *      None of sub-threads can fork after zap_process(leader). All
+         *      processes which were created before this point should be
+         *      visible to zap_threads() because copy_process() adds the new
+         *      process to the tail of init_task.tasks list, and lock/unlock
+         *      of ->siglock provides a memory barrier.
+         *
+         * do_exit:
+         *      The caller holds mm->mmap_sem. This means that the task which
+         *      uses this mm can't pass exit_mm(), so it can't exit or clear
+         *      its ->mm.
+         *
+         * de_thread:
+         *      It does list_replace_rcu(&leader->tasks, &current->tasks),
+         *      we must see either old or new leader, this does not matter.
+         *      However, it can change p->sighand, so lock_task_sighand(p)
+         *      must be used. Since p->mm != NULL and we hold ->mmap_sem
+         *      it can't fail.
+         *
+         *      Note also that "g" can be the old leader with ->mm == NULL
+         *      and already unhashed and thus removed from ->thread_group.
+         *      This is OK, __unhash_process()->list_del_rcu() does not
+         *      clear the ->next pointer, we will find the new leader via
+         *      next_thread().
+         */
+        rcu_read_lock();
+        for_each_process(g) {
+                if (g == tsk->group_leader)
+                        continue;
+                if (g->flags & PF_KTHREAD)
+                        continue;
+                p = g;
+                do {
+                        if (p->mm) {
+                                if (unlikely(p->mm == mm)) {
+                                        lock_task_sighand(p, &flags);
+                                        nr += zap_process(p, exit_code);
+                                        unlock_task_sighand(p, &flags);
+                                }
+                                break;
+                        }
+                } while_each_thread(g, p);
+        }
+        rcu_read_unlock();
+done:
+        atomic_set(&core_state->nr_threads, nr);
+        return nr;
+}
+static int coredump_wait(int exit_code, struct core_state *core_state)
+{
+        struct task_struct *tsk = current;
+        struct mm_struct *mm = tsk->mm;
+        int core_waiters = -EBUSY;
+        init_completion(&core_state->startup);
+        core_state->dumper.task = tsk;
+        core_state->dumper.next = NULL;
+        down_write(&mm->mmap_sem);
+        if (!mm->core_state)
+                core_waiters = zap_threads(tsk, mm, core_state, exit_code);
+        up_write(&mm->mmap_sem);
+        if (core_waiters > 0) {
+                struct core_thread *ptr;
+                wait_for_completion(&core_state->startup);
+                /*
+                 * Wait for all the threads to become inactive, so that
+                 * all the thread context (extended register state, like
+                 * fpu etc) gets copied to the memory.
+                 */
+                ptr = core_state->dumper.next;
+                while (ptr != NULL) {
+                        wait_task_inactive(ptr->task, 0);
+                        ptr = ptr->next;
+                }
+        }
+        return core_waiters;
+}
+static void coredump_finish(struct mm_struct *mm)
+{
+        struct core_thread *curr, *next;
+        struct task_struct *task;
+        next = mm->core_state->dumper.next;
+        while ((curr = next) != NULL) {
+                next = curr->next;
+                task = curr->task;
+                /*
+                 * see exit_mm(), curr->task must not see
+                 * ->task == NULL before we read ->next.
+                 */
+                smp_mb();
+                curr->task = NULL;
+                wake_up_process(task);
+        }
+        mm->core_state = NULL;
+}
+static void wait_for_dump_helpers(struct file *file)
+{
+        struct pipe_inode_info *pipe;
+        pipe = file->f_path.dentry->d_inode->i_pipe;
+        pipe_lock(pipe);
+        pipe->readers++;
+        pipe->writers--;
+        while ((pipe->readers > 1) && (!signal_pending(current))) {
+                wake_up_interruptible_sync(&pipe->wait);
+                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+                pipe_wait(pipe);
+        }
+        pipe->readers--;
+        pipe->writers++;
+        pipe_unlock(pipe);
+}
+/*
+ * umh_pipe_setup
+ * helper function to customize the process used
+ * to collect the core in userspace.  Specifically
+ * it sets up a pipe and installs it as fd 0 (stdin)
+ * for the process.  Returns 0 on success, or
+ * PTR_ERR on failure.
+ * Note that it also sets the core limit to 1.  This
+ * is a special value that we use to trap recursive
+ * core dumps
+ */
+static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
+{
+        struct file *files[2];
+        struct coredump_params *cp = (struct coredump_params *)info->data;
+        int err = create_pipe_files(files, 0);
+        if (err)
+                return err;
+        cp->file = files[1];
+        replace_fd(0, files[0], 0);
+        /* and disallow core files too */
+        current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
+        return 0;
+}
+void do_coredump(long signr, int exit_code, struct pt_regs *regs)
+{
+        struct core_state core_state;
+        struct core_name cn;
+        struct mm_struct *mm = current->mm;
+        struct linux_binfmt * binfmt;
+        const struct cred *old_cred;
+        struct cred *cred;
+        int retval = 0;
+        int flag = 0;
+        int ispipe;
+        struct files_struct *displaced;
+        bool need_nonrelative = false;
+        static atomic_t core_dump_count = ATOMIC_INIT(0);
+        struct coredump_params cprm = {
+                .signr = signr,
+                .regs = regs,
+                .limit = rlimit(RLIMIT_CORE),
+                /*
+                 * We must use the same mm->flags while dumping core to avoid
+                 * inconsistency of bit flags, since this flag is not protected
+                 * by any locks.
+                 */
+                .mm_flags = mm->flags,
+        };
+        audit_core_dumps(signr);
+        binfmt = mm->binfmt;
+        if (!binfmt || !binfmt->core_dump)
+                goto fail;
+        if (!__get_dumpable(cprm.mm_flags))
+                goto fail;
+        cred = prepare_creds();
+        if (!cred)
+                goto fail;
+        /*
+         * We cannot trust fsuid as being the "true" uid of the process
+         * nor do we know its entire history. We only know it was tainted
+         * so we dump it as root in mode 2, and only into a controlled
+         * environment (pipe handler or fully qualified path).
+         */
+        if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
+                /* Setuid core dump mode */
+                flag = O_EXCL;          /* Stop rewrite attacks */
+                cred->fsuid = GLOBAL_ROOT_UID;  /* Dump root private */
+                need_nonrelative = true;
+        }
+        retval = coredump_wait(exit_code, &core_state);
+        if (retval < 0)
+                goto fail_creds;
+        old_cred = override_creds(cred);
+        /*
+         * Clear any false indication of pending signals that might
+         * be seen by the filesystem code called to write the core file.
+         */
+        clear_thread_flag(TIF_SIGPENDING);
+        ispipe = format_corename(&cn, signr);
+        if (ispipe) {
+                int dump_count;
+                char **helper_argv;
+                if (ispipe < 0) {
+                        printk(KERN_WARNING "format_corename failed\n");
+                        printk(KERN_WARNING "Aborting core\n");
+                        goto fail_corename;
+                }
+                if (cprm.limit == 1) {
+                        /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
+                         *
+                         * Normally core limits are irrelevant to pipes, since
+                         * we're not writing to the file system, but we use
+                         * cprm.limit of 1 here as a speacial value, this is a
+                         * consistent way to catch recursive crashes.
+                         * We can still crash if the core_pattern binary sets
+                         * RLIM_CORE = !1, but it runs as root, and can do
+                         * lots of stupid things.
+                         *
+                         * Note that we use task_tgid_vnr here to grab the pid
+                         * of the process group leader.  That way we get the
+                         * right pid if a thread in a multi-threaded
+                         * core_pattern process dies.
+                         */
+                        printk(KERN_WARNING
+                                "Process %d(%s) has RLIMIT_CORE set to 1\n",
+                                task_tgid_vnr(current), current->comm);
+                        printk(KERN_WARNING "Aborting core\n");
+                        goto fail_unlock;
+                }
+                cprm.limit = RLIM_INFINITY;
+                dump_count = atomic_inc_return(&core_dump_count);
+                if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+                        printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
+                               task_tgid_vnr(current), current->comm);
+                        printk(KERN_WARNING "Skipping core dump\n");
+                        goto fail_dropcount;
+                }
+                helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
+                if (!helper_argv) {
+                        printk(KERN_WARNING "%s failed to allocate memory\n",
+                               __func__);
+                        goto fail_dropcount;
+                }
+                retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
+                                        NULL, UMH_WAIT_EXEC, umh_pipe_setup,
+                                        NULL, &cprm);
+                argv_free(helper_argv);
+                if (retval) {
+                        printk(KERN_INFO "Core dump to %s pipe failed\n",
+                               cn.corename);
+                        goto close_fail;
+                }
+        } else {
+                struct inode *inode;
+                if (cprm.limit < binfmt->min_coredump)
+                        goto fail_unlock;
+                if (need_nonrelative && cn.corename[0] != '/') {
+                        printk(KERN_WARNING "Pid %d(%s) can only dump core "\
+                                "to fully qualified path!\n",
+                                task_tgid_vnr(current), current->comm);
+                        printk(KERN_WARNING "Skipping core dump\n");
+                        goto fail_unlock;
+                }
+                cprm.file = filp_open(cn.corename,
+                                 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
+                                 0600);
+                if (IS_ERR(cprm.file))
+                        goto fail_unlock;
+                inode = cprm.file->f_path.dentry->d_inode;
+                if (inode->i_nlink > 1)
+                        goto close_fail;
+                if (d_unhashed(cprm.file->f_path.dentry))
+                        goto close_fail;
+                /*
+                 * AK: actually i see no reason to not allow this for named
+                 * pipes etc, but keep the previous behaviour for now.
+                 */
+                if (!S_ISREG(inode->i_mode))
+                        goto close_fail;
+                /*
+                 * Dont allow local users get cute and trick others to coredump
+                 * into their pre-created files.
+                 */
+                if (!uid_eq(inode->i_uid, current_fsuid()))
+                        goto close_fail;
+                if (!cprm.file->f_op || !cprm.file->f_op->write)
+                        goto close_fail;
+                if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
+                        goto close_fail;
+        }
+        /* get us an unshared descriptor table; almost always a no-op */
+        retval = unshare_files(&displaced);
+        if (retval)
+                goto close_fail;
+        if (displaced)
+                put_files_struct(displaced);
+        retval = binfmt->core_dump(&cprm);
+        if (retval)
+                current->signal->group_exit_code |= 0x80;
+        if (ispipe && core_pipe_limit)
+                wait_for_dump_helpers(cprm.file);
+close_fail:
+        if (cprm.file)
+                filp_close(cprm.file, NULL);
+fail_dropcount:
+        if (ispipe)
+                atomic_dec(&core_dump_count);
+fail_unlock:
+        kfree(cn.corename);
+fail_corename:
+        coredump_finish(mm);
+        revert_creds(old_cred);
+fail_creds:
+        put_cred(cred);
+fail:
+        return;
+}
+/*
+ * Core dumping helper functions.  These are the only things you should
+ * do on a core-file: use only these functions to write out all the
+ * necessary info.
+ */
+int dump_write(struct file *file, const void *addr, int nr)
+{
+        return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+}
+EXPORT_SYMBOL(dump_write);
+int dump_seek(struct file *file, loff_t off)
+{
+        int ret = 1;
+        if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+                if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
+                        return 0;
+        } else {
+                char *buf = (char *)get_zeroed_page(GFP_KERNEL);
+                if (!buf)
+                        return 0;
+                while (off > 0) {
+                        unsigned long n = off;
+                        if (n > PAGE_SIZE)
+                                n = PAGE_SIZE;
+                        if (!dump_write(file, buf, n)) {
+                                ret = 0;
+                                break;
+                        }
+                        off -= n;
+                }
+                free_page((unsigned long)buf);
+        }
+        return ret;
+}
+EXPORT_SYMBOL(dump_seek);

diff --git a/fs/coredump.c b/fs/coredump.c new file mode 100644 index 00000000000..f045bbad682 --- /dev/null +++ b/fs/coredump.c
@@ -0,0 +1,686 @@
	1	#include <linux/slab.h>
	2	#include <linux/file.h>
	3	#include <linux/fdtable.h>
	4	#include <linux/mm.h>
	5	#include <linux/stat.h>
	6	#include <linux/fcntl.h>
	7	#include <linux/swap.h>
	8	#include <linux/string.h>
	9	#include <linux/init.h>
	10	#include <linux/pagemap.h>
	11	#include <linux/perf_event.h>
	12	#include <linux/highmem.h>
	13	#include <linux/spinlock.h>
	14	#include <linux/key.h>
	15	#include <linux/personality.h>
	16	#include <linux/binfmts.h>
	17	#include <linux/utsname.h>
	18	#include <linux/pid_namespace.h>
	19	#include <linux/module.h>
	20	#include <linux/namei.h>
	21	#include <linux/mount.h>
	22	#include <linux/security.h>
	23	#include <linux/syscalls.h>
	24	#include <linux/tsacct_kern.h>
	25	#include <linux/cn_proc.h>
	26	#include <linux/audit.h>
	27	#include <linux/tracehook.h>
	28	#include <linux/kmod.h>
	29	#include <linux/fsnotify.h>
	30	#include <linux/fs_struct.h>
	31	#include <linux/pipe_fs_i.h>
	32	#include <linux/oom.h>
	33	#include <linux/compat.h>
	34
	35	#include <asm/uaccess.h>
	36	#include <asm/mmu_context.h>
	37	#include <asm/tlb.h>
	38	#include <asm/exec.h>
	39
	40	#include <trace/events/task.h>
	41	#include "internal.h"
	42
	43	#include <trace/events/sched.h>
	44
	45	int core_uses_pid;
	46	char core_pattern[CORENAME_MAX_SIZE] = "core";
	47	unsigned int core_pipe_limit;
	48
	49	struct core_name {
	50	char *corename;
	51	int used, size;
	52	};
	53	static atomic_t call_count = ATOMIC_INIT(1);
	54
	55	/* The maximal length of core_pattern is also specified in sysctl.c */
	56
	57	static int expand_corename(struct core_name *cn)
	58	{
	59	char *old_corename = cn->corename;
	60
	61	cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
	62	cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
	63
	64	if (!cn->corename) {
	65	kfree(old_corename);
	66	return -ENOMEM;
	67	}
	68
	69	return 0;
	70	}
	71
	72	static int cn_printf(struct core_name cn, const char fmt, ...)
	73	{
	74	char *cur;
	75	int need;
	76	int ret;
	77	va_list arg;
	78
	79	va_start(arg, fmt);
	80	need = vsnprintf(NULL, 0, fmt, arg);
	81	va_end(arg);
	82
	83	if (likely(need < cn->size - cn->used - 1))
	84	goto out_printf;
	85
	86	ret = expand_corename(cn);
	87	if (ret)
	88	goto expand_fail;
	89
	90	out_printf:
	91	cur = cn->corename + cn->used;
	92	va_start(arg, fmt);
	93	vsnprintf(cur, need + 1, fmt, arg);
	94	va_end(arg);
	95	cn->used += need;
	96	return 0;
	97
	98	expand_fail:
	99	return ret;
	100	}
	101
	102	static void cn_escape(char *str)
	103	{
	104	for (; *str; str++)
	105	if (*str == '/')
	106	*str = '!';
	107	}
	108
	109	static int cn_print_exe_file(struct core_name *cn)
	110	{
	111	struct file *exe_file;
	112	char pathbuf, path;
	113	int ret;
	114
	115	exe_file = get_mm_exe_file(current->mm);
	116	if (!exe_file) {
	117	char *commstart = cn->corename + cn->used;
	118	ret = cn_printf(cn, "%s (path unknown)", current->comm);
	119	cn_escape(commstart);
	120	return ret;
	121	}
	122
	123	pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
	124	if (!pathbuf) {
	125	ret = -ENOMEM;
	126	goto put_exe_file;
	127	}
	128
	129	path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
	130	if (IS_ERR(path)) {
	131	ret = PTR_ERR(path);
	132	goto free_buf;
	133	}
	134
	135	cn_escape(path);
	136
	137	ret = cn_printf(cn, "%s", path);
	138
	139	free_buf:
	140	kfree(pathbuf);
	141	put_exe_file:
	142	fput(exe_file);
	143	return ret;
	144	}
	145
	146	/* format_corename will inspect the pattern parameter, and output a
	147	* name into corename, which must have space for at least
	148	* CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
	149	*/
	150	static int format_corename(struct core_name *cn, long signr)
	151	{
	152	const struct cred *cred = current_cred();
	153	const char *pat_ptr = core_pattern;
	154	int ispipe = (*pat_ptr == '\|');
	155	int pid_in_pattern = 0;
	156	int err = 0;
	157
	158	cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
	159	cn->corename = kmalloc(cn->size, GFP_KERNEL);
	160	cn->used = 0;
	161
	162	if (!cn->corename)
	163	return -ENOMEM;
	164
	165	/* Repeat as long as we have more pattern to process and more output
	166	space */
	167	while (*pat_ptr) {
	168	if (*pat_ptr != '%') {
	169	if (*pat_ptr == 0)
	170	goto out;
	171	err = cn_printf(cn, "%c", *pat_ptr++);
	172	} else {
	173	switch (*++pat_ptr) {
	174	/* single % at the end, drop that */
	175	case 0:
	176	goto out;
	177	/* Double percent, output one percent */
	178	case '%':
	179	err = cn_printf(cn, "%c", '%');
	180	break;
	181	/* pid */
	182	case 'p':
	183	pid_in_pattern = 1;
	184	err = cn_printf(cn, "%d",
	185	task_tgid_vnr(current));
	186	break;
	187	/* uid */
	188	case 'u':
	189	err = cn_printf(cn, "%d", cred->uid);
	190	break;
	191	/* gid */
	192	case 'g':
	193	err = cn_printf(cn, "%d", cred->gid);
	194	break;
	195	/* signal that caused the coredump */
	196	case 's':
	197	err = cn_printf(cn, "%ld", signr);
	198	break;
	199	/* UNIX time of coredump */
	200	case 't': {
	201	struct timeval tv;
	202	do_gettimeofday(&tv);
	203	err = cn_printf(cn, "%lu", tv.tv_sec);
	204	break;
	205	}
	206	/* hostname */
	207	case 'h': {
	208	char *namestart = cn->corename + cn->used;
	209	down_read(&uts_sem);
	210	err = cn_printf(cn, "%s",
	211	utsname()->nodename);
	212	up_read(&uts_sem);
	213	cn_escape(namestart);
	214	break;
	215	}
	216	/* executable */
	217	case 'e': {
	218	char *commstart = cn->corename + cn->used;
	219	err = cn_printf(cn, "%s", current->comm);
	220	cn_escape(commstart);
	221	break;
	222	}
	223	case 'E':
	224	err = cn_print_exe_file(cn);
	225	break;
	226	/* core limit size */
	227	case 'c':
	228	err = cn_printf(cn, "%lu",
	229	rlimit(RLIMIT_CORE));
	230	break;
	231	default:
	232	break;
	233	}
	234	++pat_ptr;
	235	}
	236
	237	if (err)
	238	return err;
	239	}
	240
	241	/* Backward compatibility with core_uses_pid:
	242	*
	243	* If core_pattern does not include a %p (as is the default)
	244	* and core_uses_pid is set, then .%pid will be appended to
	245	* the filename. Do not do this for piped commands. */
	246	if (!ispipe && !pid_in_pattern && core_uses_pid) {
	247	err = cn_printf(cn, ".%d", task_tgid_vnr(current));
	248	if (err)
	249	return err;
	250	}
	251	out:
	252	return ispipe;
	253	}
	254
	255	static int zap_process(struct task_struct *start, int exit_code)
	256	{
	257	struct task_struct *t;
	258	int nr = 0;
	259
	260	start->signal->flags = SIGNAL_GROUP_EXIT;
	261	start->signal->group_exit_code = exit_code;
	262	start->signal->group_stop_count = 0;
	263
	264	t = start;
	265	do {
	266	task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
	267	if (t != current && t->mm) {
	268	sigaddset(&t->pending.signal, SIGKILL);
	269	signal_wake_up(t, 1);
	270	nr++;
	271	}
	272	} while_each_thread(start, t);
	273
	274	return nr;
	275	}
	276
	277	static inline int zap_threads(struct task_struct tsk, struct mm_struct mm,
	278	struct core_state *core_state, int exit_code)
	279	{
	280	struct task_struct g, p;
	281	unsigned long flags;
	282	int nr = -EAGAIN;
	283
	284	spin_lock_irq(&tsk->sighand->siglock);
	285	if (!signal_group_exit(tsk->signal)) {
	286	mm->core_state = core_state;
	287	nr = zap_process(tsk, exit_code);
	288	}
	289	spin_unlock_irq(&tsk->sighand->siglock);
	290	if (unlikely(nr < 0))
	291	return nr;
	292
	293	if (atomic_read(&mm->mm_users) == nr + 1)
	294	goto done;
	295	/*
	296	* We should find and kill all tasks which use this mm, and we should
	297	* count them correctly into ->nr_threads. We don't take tasklist
	298	* lock, but this is safe wrt:
	299	*
	300	* fork:
	301	* None of sub-threads can fork after zap_process(leader). All
	302	* processes which were created before this point should be
	303	* visible to zap_threads() because copy_process() adds the new
	304	* process to the tail of init_task.tasks list, and lock/unlock
	305	* of ->siglock provides a memory barrier.
	306	*
	307	* do_exit:
	308	* The caller holds mm->mmap_sem. This means that the task which
	309	* uses this mm can't pass exit_mm(), so it can't exit or clear
	310	* its ->mm.
	311	*
	312	* de_thread:
	313	* It does list_replace_rcu(&leader->tasks, &current->tasks),
	314	* we must see either old or new leader, this does not matter.
	315	* However, it can change p->sighand, so lock_task_sighand(p)
	316	* must be used. Since p->mm != NULL and we hold ->mmap_sem
	317	* it can't fail.
	318	*
	319	* Note also that "g" can be the old leader with ->mm == NULL
	320	* and already unhashed and thus removed from ->thread_group.
	321	* This is OK, __unhash_process()->list_del_rcu() does not
	322	* clear the ->next pointer, we will find the new leader via
	323	* next_thread().
	324	*/
	325	rcu_read_lock();
	326	for_each_process(g) {
	327	if (g == tsk->group_leader)
	328	continue;
	329	if (g->flags & PF_KTHREAD)
	330	continue;
	331	p = g;
	332	do {
	333	if (p->mm) {
	334	if (unlikely(p->mm == mm)) {
	335	lock_task_sighand(p, &flags);
	336	nr += zap_process(p, exit_code);
	337	unlock_task_sighand(p, &flags);
	338	}
	339	break;
	340	}
	341	} while_each_thread(g, p);
	342	}
	343	rcu_read_unlock();
	344	done:
	345	atomic_set(&core_state->nr_threads, nr);
	346	return nr;
	347	}
	348
	349	static int coredump_wait(int exit_code, struct core_state *core_state)
	350	{
	351	struct task_struct *tsk = current;
	352	struct mm_struct *mm = tsk->mm;
	353	int core_waiters = -EBUSY;
	354
	355	init_completion(&core_state->startup);
	356	core_state->dumper.task = tsk;
	357	core_state->dumper.next = NULL;
	358
	359	down_write(&mm->mmap_sem);
	360	if (!mm->core_state)
	361	core_waiters = zap_threads(tsk, mm, core_state, exit_code);
	362	up_write(&mm->mmap_sem);
	363
	364	if (core_waiters > 0) {
	365	struct core_thread *ptr;
	366
	367	wait_for_completion(&core_state->startup);
	368	/*
	369	* Wait for all the threads to become inactive, so that
	370	* all the thread context (extended register state, like
	371	* fpu etc) gets copied to the memory.
	372	*/
	373	ptr = core_state->dumper.next;
	374	while (ptr != NULL) {
	375	wait_task_inactive(ptr->task, 0);
	376	ptr = ptr->next;
	377	}
	378	}
	379
	380	return core_waiters;
	381	}
	382
	383	static void coredump_finish(struct mm_struct *mm)
	384	{
	385	struct core_thread curr, next;
	386	struct task_struct *task;
	387
	388	next = mm->core_state->dumper.next;
	389	while ((curr = next) != NULL) {
	390	next = curr->next;
	391	task = curr->task;
	392	/*
	393	* see exit_mm(), curr->task must not see
	394	* ->task == NULL before we read ->next.
	395	*/
	396	smp_mb();
	397	curr->task = NULL;
	398	wake_up_process(task);
	399	}
	400
	401	mm->core_state = NULL;
	402	}
	403
	404	static void wait_for_dump_helpers(struct file *file)
	405	{
	406	struct pipe_inode_info *pipe;
	407
	408	pipe = file->f_path.dentry->d_inode->i_pipe;
	409
	410	pipe_lock(pipe);
	411	pipe->readers++;
	412	pipe->writers--;
	413
	414	while ((pipe->readers > 1) && (!signal_pending(current))) {
	415	wake_up_interruptible_sync(&pipe->wait);
	416	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
	417	pipe_wait(pipe);
	418	}
	419
	420	pipe->readers--;
	421	pipe->writers++;
	422	pipe_unlock(pipe);
	423
	424	}
	425
	426	/*
	427	* umh_pipe_setup
	428	* helper function to customize the process used
	429	* to collect the core in userspace. Specifically
	430	* it sets up a pipe and installs it as fd 0 (stdin)
	431	* for the process. Returns 0 on success, or
	432	* PTR_ERR on failure.
	433	* Note that it also sets the core limit to 1. This
	434	* is a special value that we use to trap recursive
	435	* core dumps
	436	*/
	437	static int umh_pipe_setup(struct subprocess_info info, struct cred new)
	438	{
	439	struct file *files[2];
	440	struct coredump_params cp = (struct coredump_params )info->data;
	441	int err = create_pipe_files(files, 0);
	442	if (err)
	443	return err;
	444
	445	cp->file = files[1];
	446
	447	replace_fd(0, files[0], 0);
	448	/* and disallow core files too */
	449	current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
	450
	451	return 0;
	452	}
	453
	454	void do_coredump(long signr, int exit_code, struct pt_regs *regs)
	455	{
	456	struct core_state core_state;
	457	struct core_name cn;
	458	struct mm_struct *mm = current->mm;
	459	struct linux_binfmt * binfmt;
	460	const struct cred *old_cred;
	461	struct cred *cred;
	462	int retval = 0;
	463	int flag = 0;
	464	int ispipe;
	465	struct files_struct *displaced;
	466	bool need_nonrelative = false;
	467	static atomic_t core_dump_count = ATOMIC_INIT(0);
	468	struct coredump_params cprm = {
	469	.signr = signr,
	470	.regs = regs,
	471	.limit = rlimit(RLIMIT_CORE),
	472	/*
	473	* We must use the same mm->flags while dumping core to avoid
	474	* inconsistency of bit flags, since this flag is not protected
	475	* by any locks.
	476	*/
	477	.mm_flags = mm->flags,
	478	};
	479
	480	audit_core_dumps(signr);
	481
	482	binfmt = mm->binfmt;
	483	if (!binfmt \|\| !binfmt->core_dump)
	484	goto fail;
	485	if (!__get_dumpable(cprm.mm_flags))
	486	goto fail;
	487
	488	cred = prepare_creds();
	489	if (!cred)
	490	goto fail;
	491	/*
	492	* We cannot trust fsuid as being the "true" uid of the process
	493	* nor do we know its entire history. We only know it was tainted
	494	* so we dump it as root in mode 2, and only into a controlled
	495	* environment (pipe handler or fully qualified path).
	496	*/
	497	if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
	498	/* Setuid core dump mode */
	499	flag = O_EXCL; /* Stop rewrite attacks */
	500	cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
	501	need_nonrelative = true;
	502	}
	503
	504	retval = coredump_wait(exit_code, &core_state);
	505	if (retval < 0)
	506	goto fail_creds;
	507
	508	old_cred = override_creds(cred);
	509
	510	/*
	511	* Clear any false indication of pending signals that might
	512	* be seen by the filesystem code called to write the core file.
	513	*/
	514	clear_thread_flag(TIF_SIGPENDING);
	515
	516	ispipe = format_corename(&cn, signr);
	517
	518	if (ispipe) {
	519	int dump_count;
	520	char **helper_argv;
	521
	522	if (ispipe < 0) {
	523	printk(KERN_WARNING "format_corename failed\n");
	524	printk(KERN_WARNING "Aborting core\n");
	525	goto fail_corename;
	526	}
	527
	528	if (cprm.limit == 1) {
	529	/* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
	530	*
	531	* Normally core limits are irrelevant to pipes, since
	532	* we're not writing to the file system, but we use
	533	* cprm.limit of 1 here as a speacial value, this is a
	534	* consistent way to catch recursive crashes.
	535	* We can still crash if the core_pattern binary sets
	536	* RLIM_CORE = !1, but it runs as root, and can do
	537	* lots of stupid things.
	538	*
	539	* Note that we use task_tgid_vnr here to grab the pid
	540	* of the process group leader. That way we get the
	541	* right pid if a thread in a multi-threaded
	542	* core_pattern process dies.
	543	*/
	544	printk(KERN_WARNING
	545	"Process %d(%s) has RLIMIT_CORE set to 1\n",
	546	task_tgid_vnr(current), current->comm);
	547	printk(KERN_WARNING "Aborting core\n");
	548	goto fail_unlock;
	549	}
	550	cprm.limit = RLIM_INFINITY;
	551
	552	dump_count = atomic_inc_return(&core_dump_count);
	553	if (core_pipe_limit && (core_pipe_limit < dump_count)) {
	554	printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
	555	task_tgid_vnr(current), current->comm);
	556	printk(KERN_WARNING "Skipping core dump\n");
	557	goto fail_dropcount;
	558	}
	559
	560	helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
	561	if (!helper_argv) {
	562	printk(KERN_WARNING "%s failed to allocate memory\n",
	563	__func__);
	564	goto fail_dropcount;
	565	}
	566
	567	retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
	568	NULL, UMH_WAIT_EXEC, umh_pipe_setup,
	569	NULL, &cprm);
	570	argv_free(helper_argv);
	571	if (retval) {
	572	printk(KERN_INFO "Core dump to %s pipe failed\n",
	573	cn.corename);
	574	goto close_fail;
	575	}
	576	} else {
	577	struct inode *inode;
	578
	579	if (cprm.limit < binfmt->min_coredump)
	580	goto fail_unlock;
	581
	582	if (need_nonrelative && cn.corename[0] != '/') {
	583	printk(KERN_WARNING "Pid %d(%s) can only dump core "\
	584	"to fully qualified path!\n",
	585	task_tgid_vnr(current), current->comm);
	586	printk(KERN_WARNING "Skipping core dump\n");
	587	goto fail_unlock;
	588	}
	589
	590	cprm.file = filp_open(cn.corename,
	591	O_CREAT \| 2 \| O_NOFOLLOW \| O_LARGEFILE \| flag,
	592	0600);
	593	if (IS_ERR(cprm.file))
	594	goto fail_unlock;
	595
	596	inode = cprm.file->f_path.dentry->d_inode;
	597	if (inode->i_nlink > 1)
	598	goto close_fail;
	599	if (d_unhashed(cprm.file->f_path.dentry))
	600	goto close_fail;
	601	/*
	602	* AK: actually i see no reason to not allow this for named
	603	* pipes etc, but keep the previous behaviour for now.
	604	*/
	605	if (!S_ISREG(inode->i_mode))
	606	goto close_fail;
	607	/*
	608	* Dont allow local users get cute and trick others to coredump
	609	* into their pre-created files.
	610	*/
	611	if (!uid_eq(inode->i_uid, current_fsuid()))
	612	goto close_fail;
	613	if (!cprm.file->f_op \|\| !cprm.file->f_op->write)
	614	goto close_fail;
	615	if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
	616	goto close_fail;
	617	}
	618
	619	/* get us an unshared descriptor table; almost always a no-op */
	620	retval = unshare_files(&displaced);
	621	if (retval)
	622	goto close_fail;
	623	if (displaced)
	624	put_files_struct(displaced);
	625	retval = binfmt->core_dump(&cprm);
	626	if (retval)
	627	current->signal->group_exit_code \|= 0x80;
	628
	629	if (ispipe && core_pipe_limit)
	630	wait_for_dump_helpers(cprm.file);
	631	close_fail:
	632	if (cprm.file)
	633	filp_close(cprm.file, NULL);
	634	fail_dropcount:
	635	if (ispipe)
	636	atomic_dec(&core_dump_count);
	637	fail_unlock:
	638	kfree(cn.corename);
	639	fail_corename:
	640	coredump_finish(mm);
	641	revert_creds(old_cred);
	642	fail_creds:
	643	put_cred(cred);
	644	fail:
	645	return;
	646	}
	647
	648	/*
	649	* Core dumping helper functions. These are the only things you should
	650	* do on a core-file: use only these functions to write out all the
	651	* necessary info.
	652	*/
	653	int dump_write(struct file file, const void addr, int nr)
	654	{
	655	return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
	656	}
	657	EXPORT_SYMBOL(dump_write);
	658
	659	int dump_seek(struct file *file, loff_t off)
	660	{
	661	int ret = 1;
	662
	663	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
	664	if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
	665	return 0;
	666	} else {
	667	char buf = (char )get_zeroed_page(GFP_KERNEL);
	668
	669	if (!buf)
	670	return 0;
	671	while (off > 0) {
	672	unsigned long n = off;
	673
	674	if (n > PAGE_SIZE)
	675	n = PAGE_SIZE;
	676	if (!dump_write(file, buf, n)) {
	677	ret = 0;
	678	break;
	679	}
	680	off -= n;
	681	}
	682	free_page((unsigned long)buf);
	683	}
	684	return ret;
	685	}
	686	EXPORT_SYMBOL(dump_seek);