Merge branch 'akpm' (patchbomb from Andrew)

Merge first patchbomb from Andrew Morton: - a few minor cifs fixes - dma-debug upadtes - ocfs2 - slab - about half of MM - procfs - kernel/exit.c - panic.c tweaks - printk upates - lib/ updates - checkpatch updates - fs/binfmt updates - the drivers/rtc tree - nilfs - kmod fixes - more kernel/exit.c - various other misc tweaks and fixes * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (190 commits) exit: pidns: fix/update the comments in zap_pid_ns_processes() exit: pidns: alloc_pid() leaks pid_namespace if child_reaper is exiting exit: exit_notify: re-use "dead" list to autoreap current exit: reparent: call forget_original_parent() under tasklist_lock exit: reparent: avoid find_new_reaper() if no children exit: reparent: introduce find_alive_thread() exit: reparent: introduce find_child_reaper() exit: reparent: document the ->has_child_subreaper checks exit: reparent: s/while_each_thread/for_each_thread/ in find_new_reaper() exit: reparent: fix the cross-namespace PR_SET_CHILD_SUBREAPER reparenting exit: reparent: fix the dead-parent PR_SET_CHILD_SUBREAPER reparenting exit: proc: don't try to flush /proc/tgid/task/tgid exit: release_task: fix the comment about group leader accounting exit: wait: drop tasklist_lock before psig->c* accounting exit: wait: don't use zombie->real_parent exit: wait: cleanup the ptrace_reparented() checks usermodehelper: kill the kmod_thread_locker logic usermodehelper: don't use CLONE_VFORK for ____call_usermodehelper() fs/hfs/catalog.c: fix comparison bug in hfs_cat_keycmp nilfs2: fix the nilfs_iget() vs. nilfs_new_inode() races ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2014-12-10 21:34:42 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-12-10 21:34:42 -0500
commit: b6da0076bab5a12afb19312ffee41c95490af2a0 (patch)
tree: 52a5675b9c2ff95d88b981d5b9a3822f6073c112 /kernel
parent: cbfe0de303a55ed96d8831c2d5f56f8131cd6612 (diff)
parent: a53b831549141aa060a8b54b76e3a42870d74cc0 (diff)
12 files changed, 205 insertions, 424 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 17ea6d4a9a24..a59481a3fa6c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -57,7 +57,6 @@ obj-$(CONFIG_UTS_NS) += utsname.o
 obj-$(CONFIG_USER_NS) += user_namespace.o
 obj-$(CONFIG_PID_NS) += pid_namespace.o
 obj-$(CONFIG_IKCONFIG) += configs.o
-obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
 obj-$(CONFIG_SMP) += stop_machine.o
 obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 232c4bc8bcc9..8714e5ded8b4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -118,13 +118,10 @@ static void __exit_signal(struct task_struct *tsk)
        }
        /*
-         * Accumulate here the counters for all threads but the group leader
+         * Accumulate here the counters for all threads as they die. We could
-         * as they die, so they can be added into the process-wide totals
+         * skip the group leader because it is the last user of signal_struct,
-         * when those are taken.  The group leader stays around as a zombie as
+         * but we want to avoid the race with thread_group_cputime() which can
-         * long as there are other threads.  When it gets reaped, the exit.c
+         * see the empty ->thread_head list.
-         * code will add its counts into these totals.  We won't ever get here
-         * for the group leader, since it will have been the last reference on
-         * the signal_struct.
         */
        task_cputime(tsk, &utime, &stime);
        write_seqlock(&sig->stats_lock);
@@ -462,6 +459,44 @@ static void exit_mm(struct task_struct *tsk)
        clear_thread_flag(TIF_MEMDIE);
 }
+static struct task_struct *find_alive_thread(struct task_struct *p)
+{
+        struct task_struct *t;
+        for_each_thread(p, t) {
+                if (!(t->flags & PF_EXITING))
+                        return t;
+        }
+        return NULL;
+}
+static struct task_struct *find_child_reaper(struct task_struct *father)
+        __releases(&tasklist_lock)
+        __acquires(&tasklist_lock)
+{
+        struct pid_namespace *pid_ns = task_active_pid_ns(father);
+        struct task_struct *reaper = pid_ns->child_reaper;
+        if (likely(reaper != father))
+                return reaper;
+        reaper = find_alive_thread(father);
+        if (reaper) {
+                pid_ns->child_reaper = reaper;
+                return reaper;
+        }
+        write_unlock_irq(&tasklist_lock);
+        if (unlikely(pid_ns == &init_pid_ns)) {
+                panic("Attempted to kill init! exitcode=0x%08x\n",
+                        father->signal->group_exit_code ?: father->exit_code);
+        }
+        zap_pid_ns_processes(pid_ns);
+        write_lock_irq(&tasklist_lock);
+        return father;
+}
 /*
 * When we die, we re-parent all our children, and try to:
 * 1. give them to another thread in our thread group, if such a member exists
@@ -469,58 +504,36 @@ static void exit_mm(struct task_struct *tsk)
 *    child_subreaper for its children (like a service manager)
 * 3. give it to the init process (PID 1) in our pid namespace
 */
-static struct task_struct *find_new_reaper(struct task_struct *father)
+static struct task_struct *find_new_reaper(struct task_struct *father,
-        __releases(&tasklist_lock)
+                                           struct task_struct *child_reaper)
-        __acquires(&tasklist_lock)
 {
-        struct pid_namespace *pid_ns = task_active_pid_ns(father);
+        struct task_struct *thread, *reaper;
-        struct task_struct *thread;
-        thread = father;
+        thread = find_alive_thread(father);
-        while_each_thread(father, thread) {
+        if (thread)
-                if (thread->flags & PF_EXITING)
-                        continue;
-                if (unlikely(pid_ns->child_reaper == father))
-                        pid_ns->child_reaper = thread;
                return thread;
-        }
-        if (unlikely(pid_ns->child_reaper == father)) {
-                write_unlock_irq(&tasklist_lock);
-                if (unlikely(pid_ns == &init_pid_ns)) {
-                        panic("Attempted to kill init! exitcode=0x%08x\n",
-                                father->signal->group_exit_code ?:
-                                        father->exit_code);
-                }
-                zap_pid_ns_processes(pid_ns);
-                write_lock_irq(&tasklist_lock);
-        } else if (father->signal->has_child_subreaper) {
-                struct task_struct *reaper;
+        if (father->signal->has_child_subreaper) {
                /*
-                 * Find the first ancestor marked as child_subreaper.
+                 * Find the first ->is_child_subreaper ancestor in our pid_ns.
-                 * Note that the code below checks same_thread_group(reaper,
+                 * We start from father to ensure we can not look into another
-                 * pid_ns->child_reaper).  This is what we need to DTRT in a
+                 * namespace, this is safe because all its threads are dead.
-                 * PID namespace. However we still need the check above, see
-                 * http://marc.info/?l=linux-kernel&m=131385460420380
                 */
-                for (reaper = father->real_parent;
+                for (reaper = father;
-                     reaper != &init_task;
+                     !same_thread_group(reaper, child_reaper);
                     reaper = reaper->real_parent) {
-                        if (same_thread_group(reaper, pid_ns->child_reaper))
+                        /* call_usermodehelper() descendants need this check */
+                        if (reaper == &init_task)
                                break;
                        if (!reaper->signal->is_child_subreaper)
                                continue;
-                        thread = reaper;
+                        thread = find_alive_thread(reaper);
-                        do {
+                        if (thread)
-                                if (!(thread->flags & PF_EXITING))
+                                return thread;
-                                        return reaper;
-                        } while_each_thread(reaper, thread);
                }
        }
-        return pid_ns->child_reaper;
+        return child_reaper;
 }
 /*
@@ -529,15 +542,7 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
 static void reparent_leader(struct task_struct *father, struct task_struct *p,
                                struct list_head *dead)
 {
-        list_move_tail(&p->sibling, &p->real_parent->children);
+        if (unlikely(p->exit_state == EXIT_DEAD))
-        if (p->exit_state == EXIT_DEAD)
-                return;
-        /*
-         * If this is a threaded reparent there is no need to
-         * notify anyone anything has happened.
-         */
-        if (same_thread_group(p->real_parent, father))
                return;
        /* We don't want people slaying init. */
@@ -548,49 +553,53 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p,
            p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
                if (do_notify_parent(p, p->exit_signal)) {
                        p->exit_state = EXIT_DEAD;
-                        list_move_tail(&p->sibling, dead);
+                        list_add(&p->ptrace_entry, dead);
                }
        }
        kill_orphaned_pgrp(p, father);
 }
-static void forget_original_parent(struct task_struct *father)
+/*
+ * This does two things:
+ *
+ * A.  Make init inherit all the child processes
+ * B.  Check to see if any process groups have become orphaned
+ *      as a result of our exiting, and if they have any stopped
+ *      jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
+ */
+static void forget_original_parent(struct task_struct *father,
+                                        struct list_head *dead)
 {
-        struct task_struct *p, *n, *reaper;
+        struct task_struct *p, *t, *reaper;
-        LIST_HEAD(dead_children);
-        write_lock_irq(&tasklist_lock);
+        if (unlikely(!list_empty(&father->ptraced)))
-        /*
+                exit_ptrace(father, dead);
-         * Note that exit_ptrace() and find_new_reaper() might
-         * drop tasklist_lock and reacquire it.
-         */
-        exit_ptrace(father);
-        reaper = find_new_reaper(father);
-        list_for_each_entry_safe(p, n, &father->children, sibling) {
+        /* Can drop and reacquire tasklist_lock */
-                struct task_struct *t = p;
+        reaper = find_child_reaper(father);
+        if (list_empty(&father->children))
+                return;
-                do {
+        reaper = find_new_reaper(father, reaper);
+        list_for_each_entry(p, &father->children, sibling) {
+                for_each_thread(p, t) {
                        t->real_parent = reaper;
-                        if (t->parent == father) {
+                        BUG_ON((!t->ptrace) != (t->parent == father));
-                                BUG_ON(t->ptrace);
+                        if (likely(!t->ptrace))
                                t->parent = t->real_parent;
-                        }
                        if (t->pdeath_signal)
                                group_send_sig_info(t->pdeath_signal,
                                                    SEND_SIG_NOINFO, t);
-                } while_each_thread(p, t);
+                }
-                reparent_leader(father, p, &dead_children);
+                /*
-        }
+                 * If this is a threaded reparent there is no need to
-        write_unlock_irq(&tasklist_lock);
+                 * notify anyone anything has happened.
+                 */
-        BUG_ON(!list_empty(&father->children));
+                if (!same_thread_group(reaper, father))
+                        reparent_leader(father, p, dead);
-        list_for_each_entry_safe(p, n, &dead_children, sibling) {
-                list_del_init(&p->sibling);
-                release_task(p);
        }
+        list_splice_tail_init(&father->children, &reaper->children);
 }
 /*
@@ -600,18 +609,12 @@ static void forget_original_parent(struct task_struct *father)
 static void exit_notify(struct task_struct *tsk, int group_dead)
 {
        bool autoreap;
+        struct task_struct *p, *n;
-        /*
+        LIST_HEAD(dead);
-         * This does two things:
-         *
-         * A.  Make init inherit all the child processes
-         * B.  Check to see if any process groups have become orphaned
-         *      as a result of our exiting, and if they have any stopped
-         *      jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
-         */
-        forget_original_parent(tsk);
        write_lock_irq(&tasklist_lock);
+        forget_original_parent(tsk, &dead);
        if (group_dead)
                kill_orphaned_pgrp(tsk->group_leader, NULL);
@@ -629,15 +632,18 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
        }
        tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;
+        if (tsk->exit_state == EXIT_DEAD)
+                list_add(&tsk->ptrace_entry, &dead);
        /* mt-exec, de_thread() is waiting for group leader */
        if (unlikely(tsk->signal->notify_count < 0))
                wake_up_process(tsk->signal->group_exit_task);
        write_unlock_irq(&tasklist_lock);
-        /* If the process is dead, release it - nobody will wait for it */
+        list_for_each_entry_safe(p, n, &dead, ptrace_entry) {
-        if (autoreap)
+                list_del_init(&p->ptrace_entry);
-                release_task(tsk);
+                release_task(p);
+        }
 }
 #ifdef CONFIG_DEBUG_STACK_USAGE
@@ -982,8 +988,7 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
 */
 static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 {
-        unsigned long state;
+        int state, retval, status;
-        int retval, status, traced;
        pid_t pid = task_pid_vnr(p);
        uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
        struct siginfo __user *infop;
@@ -1008,21 +1013,25 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
                }
                return wait_noreap_copyout(wo, p, pid, uid, why, status);
        }
-        traced = ptrace_reparented(p);
        /*
         * Move the task's state to DEAD/TRACE, only one thread can do this.
         */
-        state = traced && thread_group_leader(p) ? EXIT_TRACE : EXIT_DEAD;
+        state = (ptrace_reparented(p) && thread_group_leader(p)) ?
+                EXIT_TRACE : EXIT_DEAD;
        if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE)
                return 0;
        /*
-         * It can be ptraced but not reparented, check
+         * We own this thread, nobody else can reap it.
-         * thread_group_leader() to filter out sub-threads.
         */
-        if (likely(!traced) && thread_group_leader(p)) {
+        read_unlock(&tasklist_lock);
-                struct signal_struct *psig;
+        sched_annotate_sleep();
-                struct signal_struct *sig;
+        /*
+         * Check thread_group_leader() to exclude the traced sub-threads.
+         */
+        if (state == EXIT_DEAD && thread_group_leader(p)) {
+                struct signal_struct *sig = p->signal;
+                struct signal_struct *psig = current->signal;
                unsigned long maxrss;
                cputime_t tgutime, tgstime;
@@ -1034,21 +1043,20 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
                 * accumulate in the parent's signal_struct c* fields.
                 *
                 * We don't bother to take a lock here to protect these
-                 * p->signal fields, because they are only touched by
+                 * p->signal fields because the whole thread group is dead
-                 * __exit_signal, which runs with tasklist_lock
+                 * and nobody can change them.
-                 * write-locked anyway, and so is excluded here.  We do
+                 *
-                 * need to protect the access to parent->signal fields,
+                 * psig->stats_lock also protects us from our sub-theads
-                 * as other threads in the parent group can be right
+                 * which can reap other children at the same time. Until
-                 * here reaping other children at the same time.
+                 * we change k_getrusage()-like users to rely on this lock
+                 * we have to take ->siglock as well.
                 *
                 * We use thread_group_cputime_adjusted() to get times for
                 * the thread group, which consolidates times for all threads
                 * in the group including the group leader.
                 */
                thread_group_cputime_adjusted(p, &tgutime, &tgstime);
-                spin_lock_irq(&p->real_parent->sighand->siglock);
+                spin_lock_irq(&current->sighand->siglock);
-                psig = p->real_parent->signal;
-                sig = p->signal;
                write_seqlock(&psig->stats_lock);
                psig->cutime += tgutime + sig->cutime;
                psig->cstime += tgstime + sig->cstime;
@@ -1073,16 +1081,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
                task_io_accounting_add(&psig->ioac, &p->ioac);
                task_io_accounting_add(&psig->ioac, &sig->ioac);
                write_sequnlock(&psig->stats_lock);
-                spin_unlock_irq(&p->real_parent->sighand->siglock);
+                spin_unlock_irq(&current->sighand->siglock);
        }
-        /*
-         * Now we are sure this task is interesting, and no other
-         * thread can reap it because we its state == DEAD/TRACE.
-         */
-        read_unlock(&tasklist_lock);
-        sched_annotate_sleep();
        retval = wo->wo_rusage
                ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
        status = (p->signal->flags & SIGNAL_GROUP_EXIT)
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 80f7a6d00519..2777f40a9c7b 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -47,13 +47,6 @@ extern int max_threads;
 static struct workqueue_struct *khelper_wq;
-/*
- * kmod_thread_locker is used for deadlock avoidance.  There is no explicit
- * locking to protect this global - it is private to the singleton khelper
- * thread and should only ever be modified by that thread.
- */
-static const struct task_struct *kmod_thread_locker;
 #define CAP_BSET        (void *)1
 #define CAP_PI          (void *)2
@@ -223,7 +216,6 @@ static void umh_complete(struct subprocess_info *sub_info)
 static int ____call_usermodehelper(void *data)
 {
        struct subprocess_info *sub_info = data;
-        int wait = sub_info->wait & ~UMH_KILLABLE;
        struct cred *new;
        int retval;
@@ -267,20 +259,13 @@ static int ____call_usermodehelper(void *data)
 out:
        sub_info->retval = retval;
        /* wait_for_helper() will call umh_complete if UHM_WAIT_PROC. */
-        if (wait != UMH_WAIT_PROC)
+        if (!(sub_info->wait & UMH_WAIT_PROC))
                umh_complete(sub_info);
        if (!retval)
                return 0;
        do_exit(0);
 }
-static int call_helper(void *data)
-{
-        /* Worker thread started blocking khelper thread. */
-        kmod_thread_locker = current;
-        return ____call_usermodehelper(data);
-}
 /* Keventd can't block, but this (a child) can. */
 static int wait_for_helper(void *data)
 {
@@ -323,21 +308,14 @@ static void __call_usermodehelper(struct work_struct *work)
 {
        struct subprocess_info *sub_info =
                container_of(work, struct subprocess_info, work);
-        int wait = sub_info->wait & ~UMH_KILLABLE;
        pid_t pid;
-        /* CLONE_VFORK: wait until the usermode helper has execve'd
+        if (sub_info->wait & UMH_WAIT_PROC)
-         * successfully We need the data structures to stay around
-         * until that is done.  */
-        if (wait == UMH_WAIT_PROC)
                pid = kernel_thread(wait_for_helper, sub_info,
                                    CLONE_FS | CLONE_FILES | SIGCHLD);
-        else {
+        else
-                pid = kernel_thread(call_helper, sub_info,
+                pid = kernel_thread(____call_usermodehelper, sub_info,
-                                    CLONE_VFORK | SIGCHLD);
+                                    SIGCHLD);
-                /* Worker thread stopped blocking khelper thread. */
-                kmod_thread_locker = NULL;
-        }
        if (pid < 0) {
                sub_info->retval = pid;
@@ -571,17 +549,6 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
                goto out;
        }
        /*
-         * Worker thread must not wait for khelper thread at below
-         * wait_for_completion() if the thread was created with CLONE_VFORK
-         * flag, for khelper thread is already waiting for the thread at
-         * wait_for_completion() in do_fork().
-         */
-        if (wait != UMH_NO_WAIT && current == kmod_thread_locker) {
-                retval = -EBUSY;
-                goto out;
-        }
-        /*
         * Set the completion pointer only if there is a waiter.
         * This makes it possible to use umh_complete to free
         * the data structure in case of UMH_NO_WAIT.
diff --git a/kernel/panic.c b/kernel/panic.c
index cf80672b7924..4d8d6f906dec 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -33,6 +33,7 @@ static int pause_on_oops;
 static int pause_on_oops_flag;
 static DEFINE_SPINLOCK(pause_on_oops_lock);
 static bool crash_kexec_post_notifiers;
+int panic_on_warn __read_mostly;
 int panic_timeout = CONFIG_PANIC_TIMEOUT;
 EXPORT_SYMBOL_GPL(panic_timeout);
@@ -428,6 +429,17 @@ static void warn_slowpath_common(const char *file, int line, void *caller,
        if (args)
                vprintk(args->fmt, args->args);
+        if (panic_on_warn) {
+                /*
+                 * This thread may hit another WARN() in the panic path.
+                 * Resetting this prevents additional WARN() from panicking the
+                 * system on this thread.  Other threads are blocked by the
+                 * panic_mutex in panic().
+                 */
+                panic_on_warn = 0;
+                panic("panic_on_warn set ...\n");
+        }
        print_modules();
        dump_stack();
        print_oops_end_marker();
@@ -485,6 +497,7 @@ EXPORT_SYMBOL(__stack_chk_fail);
 core_param(panic, panic_timeout, int, 0644);
 core_param(pause_on_oops, pause_on_oops, int, 0644);
+core_param(panic_on_warn, panic_on_warn, int, 0644);
 static int __init setup_crash_kexec_post_notifiers(char *s)
 {
diff --git a/kernel/pid.c b/kernel/pid.c
index 9b9a26698144..82430c858d69 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -341,6 +341,8 @@ out:
 out_unlock:
        spin_unlock_irq(&pidmap_lock);
+        put_pid_ns(ns);
 out_free:
        while (++i <= ns->level)
                free_pidmap(pid->numbers + i);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index db95d8eb761b..bc6d6a89b6e6 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -190,7 +190,11 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
        /* Don't allow any more processes into the pid namespace */
        disable_pid_allocation(pid_ns);
-        /* Ignore SIGCHLD causing any terminated children to autoreap */
+        /*
+         * Ignore SIGCHLD causing any terminated children to autoreap.
+         * This speeds up the namespace shutdown, plus see the comment
+         * below.
+         */
        spin_lock_irq(&me->sighand->siglock);
        me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
        spin_unlock_irq(&me->sighand->siglock);
@@ -223,15 +227,31 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
        }
        read_unlock(&tasklist_lock);
-        /* Firstly reap the EXIT_ZOMBIE children we may have. */
+        /*
+         * Reap the EXIT_ZOMBIE children we had before we ignored SIGCHLD.
+         * sys_wait4() will also block until our children traced from the
+         * parent namespace are detached and become EXIT_DEAD.
+         */
        do {
                clear_thread_flag(TIF_SIGPENDING);
                rc = sys_wait4(-1, NULL, __WALL, NULL);
        } while (rc != -ECHILD);
        /*
-         * sys_wait4() above can't reap the TASK_DEAD children.
+         * sys_wait4() above can't reap the EXIT_DEAD children but we do not
-         * Make sure they all go away, see free_pid().
+         * really care, we could reparent them to the global init. We could
+         * exit and reap ->child_reaper even if it is not the last thread in
+         * this pid_ns, free_pid(nr_hashed == 0) calls proc_cleanup_work(),
+         * pid_ns can not go away until proc_kill_sb() drops the reference.
+         *
+         * But this ns can also have other tasks injected by setns()+fork().
+         * Again, ignoring the user visible semantics we do not really need
+         * to wait until they are all reaped, but they can be reparented to
+         * us and thus we need to ensure that pid->child_reaper stays valid
+         * until they all go away. See free_pid()->wake_up_process().
+         *
+         * We rely on ignored SIGCHLD, an injected zombie must be autoreaped
+         * if reparented.
         */
        for (;;) {
                set_current_state(TASK_UNINTERRUPTIBLE);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index c8755e7e1dba..ea27c019655a 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -62,9 +62,6 @@ int console_printk[4] = {
        CONSOLE_LOGLEVEL_DEFAULT,       /* default_console_loglevel */
 };
-/* Deferred messaged from sched code are marked by this special level */
-#define SCHED_MESSAGE_LOGLEVEL -2
 /*
 * Low level drivers may need that to know if they can schedule in
 * their unblank() callback or not. So let's export it.
@@ -1259,7 +1256,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
 int do_syslog(int type, char __user *buf, int len, bool from_file)
 {
        bool clear = false;
-        static int saved_console_loglevel = -1;
+        static int saved_console_loglevel = LOGLEVEL_DEFAULT;
        int error;
        error = check_syslog_permissions(type, from_file);
@@ -1316,15 +1313,15 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
                break;
        /* Disable logging to console */
        case SYSLOG_ACTION_CONSOLE_OFF:
-                if (saved_console_loglevel == -1)
+                if (saved_console_loglevel == LOGLEVEL_DEFAULT)
                        saved_console_loglevel = console_loglevel;
                console_loglevel = minimum_console_loglevel;
                break;
        /* Enable logging to console */
        case SYSLOG_ACTION_CONSOLE_ON:
-                if (saved_console_loglevel != -1) {
+                if (saved_console_loglevel != LOGLEVEL_DEFAULT) {
                        console_loglevel = saved_console_loglevel;
-                        saved_console_loglevel = -1;
+                        saved_console_loglevel = LOGLEVEL_DEFAULT;
                }
                break;
        /* Set level of messages printed to console */
@@ -1336,7 +1333,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
                        len = minimum_console_loglevel;
                console_loglevel = len;
                /* Implicitly re-enable logging to console */
-                saved_console_loglevel = -1;
+                saved_console_loglevel = LOGLEVEL_DEFAULT;
                error = 0;
                break;
        /* Number of chars in the log buffer */
@@ -1627,10 +1624,10 @@ asmlinkage int vprintk_emit(int facility, int level,
        int printed_len = 0;
        bool in_sched = false;
        /* cpu currently holding logbuf_lock in this function */
-        static volatile unsigned int logbuf_cpu = UINT_MAX;
+        static unsigned int logbuf_cpu = UINT_MAX;
-        if (level == SCHED_MESSAGE_LOGLEVEL) {
+        if (level == LOGLEVEL_SCHED) {
-                level = -1;
+                level = LOGLEVEL_DEFAULT;
                in_sched = true;
        }
@@ -1695,8 +1692,9 @@ asmlinkage int vprintk_emit(int facility, int level,
                        const char *end_of_header = printk_skip_level(text);
                        switch (kern_level) {
                        case '0' ... '7':
-                                if (level == -1)
+                                if (level == LOGLEVEL_DEFAULT)
                                        level = kern_level - '0';
+                                /* fallthrough */
                        case 'd':       /* KERN_DEFAULT */
                                lflags |= LOG_PREFIX;
                        }
@@ -1710,7 +1708,7 @@ asmlinkage int vprintk_emit(int facility, int level,
                }
        }
-        if (level == -1)
+        if (level == LOGLEVEL_DEFAULT)
                level = default_message_loglevel;
        if (dict)
@@ -1788,7 +1786,7 @@ EXPORT_SYMBOL(vprintk_emit);
 asmlinkage int vprintk(const char *fmt, va_list args)
 {
-        return vprintk_emit(0, -1, NULL, 0, fmt, args);
+        return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
 }
 EXPORT_SYMBOL(vprintk);
@@ -1842,7 +1840,7 @@ asmlinkage __visible int printk(const char *fmt, ...)
        }
 #endif
        va_start(args, fmt);
-        r = vprintk_emit(0, -1, NULL, 0, fmt, args);
+        r = vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
        va_end(args);
        return r;
@@ -1881,23 +1879,20 @@ static size_t cont_print_text(char *text, size_t size) { return 0; }
 #ifdef CONFIG_EARLY_PRINTK
 struct console *early_console;
-void early_vprintk(const char *fmt, va_list ap)
-{
-        if (early_console) {
-                char buf[512];
-                int n = vscnprintf(buf, sizeof(buf), fmt, ap);
-                early_console->write(early_console, buf, n);
-        }
-}
 asmlinkage __visible void early_printk(const char *fmt, ...)
 {
        va_list ap;
+        char buf[512];
+        int n;
+        if (!early_console)
+                return;
        va_start(ap, fmt);
-        early_vprintk(fmt, ap);
+        n = vscnprintf(buf, sizeof(buf), fmt, ap);
        va_end(ap);
+        early_console->write(early_console, buf, n);
 }
 #endif
@@ -2634,7 +2629,7 @@ int printk_deferred(const char *fmt, ...)
        preempt_disable();
        va_start(args, fmt);
-        r = vprintk_emit(0, SCHED_MESSAGE_LOGLEVEL, NULL, 0, fmt, args);
+        r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
        va_end(args);
        __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 54e75226c2c4..1eb9d90c3af9 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -485,36 +485,19 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
 /*
 * Detach all tasks we were using ptrace on. Called with tasklist held
- * for writing, and returns with it held too. But note it can release
+ * for writing.
- * and reacquire the lock.
 */
-void exit_ptrace(struct task_struct *tracer)
+void exit_ptrace(struct task_struct *tracer, struct list_head *dead)
-        __releases(&tasklist_lock)
-        __acquires(&tasklist_lock)
 {
        struct task_struct *p, *n;
-        LIST_HEAD(ptrace_dead);
-        if (likely(list_empty(&tracer->ptraced)))
-                return;
        list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) {
                if (unlikely(p->ptrace & PT_EXITKILL))
                        send_sig_info(SIGKILL, SEND_SIG_FORCED, p);
                if (__ptrace_detach(tracer, p))
-                        list_add(&p->ptrace_entry, &ptrace_dead);
+                        list_add(&p->ptrace_entry, dead);
-        }
-        write_unlock_irq(&tasklist_lock);
-        BUG_ON(!list_empty(&tracer->ptraced));
-        list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_entry) {
-                list_del_init(&p->ptrace_entry);
-                release_task(p);
        }
-        write_lock_irq(&tasklist_lock);
 }
 int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len)
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
deleted file mode 100644
index e791130f85a7..000000000000
--- a/kernel/res_counter.c
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * resource cgroups
- *
- * Copyright 2007 OpenVZ SWsoft Inc
- *
- * Author: Pavel Emelianov <xemul@openvz.org>
- *
- */
-#include <linux/types.h>
-#include <linux/parser.h>
-#include <linux/fs.h>
-#include <linux/res_counter.h>
-#include <linux/uaccess.h>
-#include <linux/mm.h>
-void res_counter_init(struct res_counter *counter, struct res_counter *parent)
-{
-        spin_lock_init(&counter->lock);
-        counter->limit = RES_COUNTER_MAX;
-        counter->soft_limit = RES_COUNTER_MAX;
-        counter->parent = parent;
-}
-static u64 res_counter_uncharge_locked(struct res_counter *counter,
-                                       unsigned long val)
-{
-        if (WARN_ON(counter->usage < val))
-                val = counter->usage;
-        counter->usage -= val;
-        return counter->usage;
-}
-static int res_counter_charge_locked(struct res_counter *counter,
-                                     unsigned long val, bool force)
-{
-        int ret = 0;
-        if (counter->usage + val > counter->limit) {
-                counter->failcnt++;
-                ret = -ENOMEM;
-                if (!force)
-                        return ret;
-        }
-        counter->usage += val;
-        if (counter->usage > counter->max_usage)
-                counter->max_usage = counter->usage;
-        return ret;
-}
-static int __res_counter_charge(struct res_counter *counter, unsigned long val,
-                                struct res_counter **limit_fail_at, bool force)
-{
-        int ret, r;
-        unsigned long flags;
-        struct res_counter *c, *u;
-        r = ret = 0;
-        *limit_fail_at = NULL;
-        local_irq_save(flags);
-        for (c = counter; c != NULL; c = c->parent) {
-                spin_lock(&c->lock);
-                r = res_counter_charge_locked(c, val, force);
-                spin_unlock(&c->lock);
-                if (r < 0 && !ret) {
-                        ret = r;
-                        *limit_fail_at = c;
-                        if (!force)
-                                break;
-                }
-        }
-        if (ret < 0 && !force) {
-                for (u = counter; u != c; u = u->parent) {
-                        spin_lock(&u->lock);
-                        res_counter_uncharge_locked(u, val);
-                        spin_unlock(&u->lock);
-                }
-        }
-        local_irq_restore(flags);
-        return ret;
-}
-int res_counter_charge(struct res_counter *counter, unsigned long val,
-                        struct res_counter **limit_fail_at)
-{
-        return __res_counter_charge(counter, val, limit_fail_at, false);
-}
-int res_counter_charge_nofail(struct res_counter *counter, unsigned long val,
-                              struct res_counter **limit_fail_at)
-{
-        return __res_counter_charge(counter, val, limit_fail_at, true);
-}
-u64 res_counter_uncharge_until(struct res_counter *counter,
-                               struct res_counter *top,
-                               unsigned long val)
-{
-        unsigned long flags;
-        struct res_counter *c;
-        u64 ret = 0;
-        local_irq_save(flags);
-        for (c = counter; c != top; c = c->parent) {
-                u64 r;
-                spin_lock(&c->lock);
-                r = res_counter_uncharge_locked(c, val);
-                if (c == counter)
-                        ret = r;
-                spin_unlock(&c->lock);
-        }
-        local_irq_restore(flags);
-        return ret;
-}
-u64 res_counter_uncharge(struct res_counter *counter, unsigned long val)
-{
-        return res_counter_uncharge_until(counter, NULL, val);
-}
-static inline unsigned long long *
-res_counter_member(struct res_counter *counter, int member)
-{
-        switch (member) {
-        case RES_USAGE:
-                return &counter->usage;
-        case RES_MAX_USAGE:
-                return &counter->max_usage;
-        case RES_LIMIT:
-                return &counter->limit;
-        case RES_FAILCNT:
-                return &counter->failcnt;
-        case RES_SOFT_LIMIT:
-                return &counter->soft_limit;
-        };
-        BUG();
-        return NULL;
-}
-ssize_t res_counter_read(struct res_counter *counter, int member,
-                const char __user *userbuf, size_t nbytes, loff_t *pos,
-                int (*read_strategy)(unsigned long long val, char *st_buf))
-{
-        unsigned long long *val;
-        char buf[64], *s;
-        s = buf;
-        val = res_counter_member(counter, member);
-        if (read_strategy)
-                s += read_strategy(*val, s);
-        else
-                s += sprintf(s, "%llu\n", *val);
-        return simple_read_from_buffer((void __user *)userbuf, nbytes,
-                        pos, buf, s - buf);
-}
-#if BITS_PER_LONG == 32
-u64 res_counter_read_u64(struct res_counter *counter, int member)
-{
-        unsigned long flags;
-        u64 ret;
-        spin_lock_irqsave(&counter->lock, flags);
-        ret = *res_counter_member(counter, member);
-        spin_unlock_irqrestore(&counter->lock, flags);
-        return ret;
-}
-#else
-u64 res_counter_read_u64(struct res_counter *counter, int member)
-{
-        return *res_counter_member(counter, member);
-}
-#endif
-int res_counter_memparse_write_strategy(const char *buf,
-                                        unsigned long long *resp)
-{
-        char *end;
-        unsigned long long res;
-        /* return RES_COUNTER_MAX(unlimited) if "-1" is specified */
-        if (*buf == '-') {
-                int rc = kstrtoull(buf + 1, 10, &res);
-                if (rc)
-                        return rc;
-                if (res != 1)
-                        return -EINVAL;
-                *resp = RES_COUNTER_MAX;
-                return 0;
-        }
-        res = memparse(buf, &end);
-        if (*end != '\0')
-                return -EINVAL;
-        if (PAGE_ALIGN(res) >= res)
-                res = PAGE_ALIGN(res);
-        else
-                res = RES_COUNTER_MAX;
-        *resp = res;
-        return 0;
-}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bb398c0c5f08..b5797b78add6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4527,8 +4527,10 @@ void sched_show_task(struct task_struct *p)
 #ifdef CONFIG_DEBUG_STACK_USAGE
        free = stack_not_used(p);
 #endif
+        ppid = 0;
        rcu_read_lock();
-        ppid = task_pid_nr(rcu_dereference(p->real_parent));
+        if (pid_alive(p))
+                ppid = task_pid_nr(rcu_dereference(p->real_parent));
        rcu_read_unlock();
        printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
                task_pid_nr(p), ppid,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 15f2511a1b7c..7c54ff79afd7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1104,6 +1104,15 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = proc_dointvec,
        },
 #endif
+        {
+                .procname       = "panic_on_warn",
+                .data           = &panic_on_warn,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = proc_dointvec_minmax,
+                .extra1         = &zero,
+                .extra2         = &one,
+        },
        { }
 };
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 9a4f750a2963..7e7746a42a62 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -137,6 +137,7 @@ static const struct bin_table bin_kern_table[] = {
        { CTL_INT,      KERN_COMPAT_LOG,                "compat-log" },
        { CTL_INT,      KERN_MAX_LOCK_DEPTH,            "max_lock_depth" },
        { CTL_INT,      KERN_PANIC_ON_NMI,              "panic_on_unrecovered_nmi" },
+        { CTL_INT,      KERN_PANIC_ON_WARN,             "panic_on_warn" },
        {}
 };
author	Linus Torvalds <torvalds@linux-foundation.org>	2014-12-10 21:34:42 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-12-10 21:34:42 -0500
commit	b6da0076bab5a12afb19312ffee41c95490af2a0 (patch)
tree	52a5675b9c2ff95d88b981d5b9a3822f6073c112 /kernel
parent	cbfe0de303a55ed96d8831c2d5f56f8131cd6612 (diff)
parent	a53b831549141aa060a8b54b76e3a42870d74cc0 (diff)