27 files changed, 414 insertions, 140 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index caaea6e944f8..af1de0f34eae 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1863,11 +1863,12 @@ void __audit_syscall_entry(int arch, int major,
 /**
 * audit_syscall_exit - deallocate audit context after a system call
- * @pt_regs: syscall registers
+ * @success: success value of the syscall
+ * @return_code: return value of the syscall
 *
 * Tear down after system call.  If the audit context has been marked as
 * auditable (either because of the AUDIT_RECORD_CONTEXT state from
- * filtering, or because some other part of the kernel write an audit
+ * filtering, or because some other part of the kernel wrote an audit
 * message), then write out the syscall information.  In call cases,
 * free the names stored from getname().
 */
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 057e24b665cf..6581a040f399 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -115,8 +115,6 @@ int get_callchain_buffers(void)
        }
        err = alloc_callchain_buffers();
-        if (err)
-                release_callchain_buffers();
 exit:
        mutex_unlock(&callchain_mutex);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a8f4ac001a00..1b5c081d8b9f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -815,7 +815,7 @@ static void update_event_times(struct perf_event *event)
         * here.
         */
        if (is_cgroup_event(event))
-                run_end = perf_event_time(event);
+                run_end = perf_cgroup_event_time(event);
        else if (ctx->is_active)
                run_end = ctx->time;
        else
@@ -2300,7 +2300,10 @@ do {					\
        return div64_u64(dividend, divisor);
 }
-static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
+static DEFINE_PER_CPU(int, perf_throttled_count);
+static DEFINE_PER_CPU(u64, perf_throttled_seq);
+static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bool disable)
 {
        struct hw_perf_event *hwc = &event->hw;
        s64 period, sample_period;
@@ -2319,22 +2322,40 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
        hwc->sample_period = sample_period;
        if (local64_read(&hwc->period_left) > 8*sample_period) {
-                event->pmu->stop(event, PERF_EF_UPDATE);
+                if (disable)
+                        event->pmu->stop(event, PERF_EF_UPDATE);
                local64_set(&hwc->period_left, 0);
-                event->pmu->start(event, PERF_EF_RELOAD);
+                if (disable)
+                        event->pmu->start(event, PERF_EF_RELOAD);
        }
 }
-static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
+/*
+ * combine freq adjustment with unthrottling to avoid two passes over the
+ * events. At the same time, make sure, having freq events does not change
+ * the rate of unthrottling as that would introduce bias.
+ */
+static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
+                                           int needs_unthr)
 {
        struct perf_event *event;
        struct hw_perf_event *hwc;
-        u64 interrupts, now;
+        u64 now, period = TICK_NSEC;
        s64 delta;
-        if (!ctx->nr_freq)
+        /*
+         * only need to iterate over all events iff:
+         * - context have events in frequency mode (needs freq adjust)
+         * - there are events to unthrottle on this cpu
+         */
+        if (!(ctx->nr_freq || needs_unthr))
                return;
+        raw_spin_lock(&ctx->lock);
+        perf_pmu_disable(ctx->pmu);
        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
                if (event->state != PERF_EVENT_STATE_ACTIVE)
                        continue;
@@ -2344,13 +2365,8 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
                hwc = &event->hw;
-                interrupts = hwc->interrupts;
+                if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) {
-                hwc->interrupts = 0;
+                        hwc->interrupts = 0;
-                /*
-                 * unthrottle events on the tick
-                 */
-                if (interrupts == MAX_INTERRUPTS) {
                        perf_log_throttle(event, 1);
                        event->pmu->start(event, 0);
                }
@@ -2358,14 +2374,30 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
                if (!event->attr.freq || !event->attr.sample_freq)
                        continue;
-                event->pmu->read(event);
+                /*
+                 * stop the event and update event->count
+                 */
+                event->pmu->stop(event, PERF_EF_UPDATE);
                now = local64_read(&event->count);
                delta = now - hwc->freq_count_stamp;
                hwc->freq_count_stamp = now;
+                /*
+                 * restart the event
+                 * reload only if value has changed
+                 * we have stopped the event so tell that
+                 * to perf_adjust_period() to avoid stopping it
+                 * twice.
+                 */
                if (delta > 0)
-                        perf_adjust_period(event, period, delta);
+                        perf_adjust_period(event, period, delta, false);
+                event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
        }
+        perf_pmu_enable(ctx->pmu);
+        raw_spin_unlock(&ctx->lock);
 }
 /*
@@ -2388,16 +2420,13 @@ static void rotate_ctx(struct perf_event_context *ctx)
 */
 static void perf_rotate_context(struct perf_cpu_context *cpuctx)
 {
-        u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
        struct perf_event_context *ctx = NULL;
-        int rotate = 0, remove = 1, freq = 0;
+        int rotate = 0, remove = 1;
        if (cpuctx->ctx.nr_events) {
                remove = 0;
                if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
                        rotate = 1;
-                if (cpuctx->ctx.nr_freq)
-                        freq = 1;
        }
        ctx = cpuctx->task_ctx;
@@ -2405,37 +2434,26 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
                remove = 0;
                if (ctx->nr_events != ctx->nr_active)
                        rotate = 1;
-                if (ctx->nr_freq)
-                        freq = 1;
        }
-        if (!rotate && !freq)
+        if (!rotate)
                goto done;
        perf_ctx_lock(cpuctx, cpuctx->task_ctx);
        perf_pmu_disable(cpuctx->ctx.pmu);
-        if (freq) {
+        cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
-                perf_ctx_adjust_freq(&cpuctx->ctx, interval);
+        if (ctx)
-                if (ctx)
+                ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
-                        perf_ctx_adjust_freq(ctx, interval);
-        }
-        if (rotate) {
-                cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
-                if (ctx)
-                        ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
-                rotate_ctx(&cpuctx->ctx);
+        rotate_ctx(&cpuctx->ctx);
-                if (ctx)
+        if (ctx)
-                        rotate_ctx(ctx);
+                rotate_ctx(ctx);
-                perf_event_sched_in(cpuctx, ctx, current);
+        perf_event_sched_in(cpuctx, ctx, current);
-        }
        perf_pmu_enable(cpuctx->ctx.pmu);
        perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
 done:
        if (remove)
                list_del_init(&cpuctx->rotation_list);
@@ -2445,10 +2463,22 @@ void perf_event_task_tick(void)
 {
        struct list_head *head = &__get_cpu_var(rotation_list);
        struct perf_cpu_context *cpuctx, *tmp;
+        struct perf_event_context *ctx;
+        int throttled;
        WARN_ON(!irqs_disabled());
+        __this_cpu_inc(perf_throttled_seq);
+        throttled = __this_cpu_xchg(perf_throttled_count, 0);
        list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
+                ctx = &cpuctx->ctx;
+                perf_adjust_freq_unthr_context(ctx, throttled);
+                ctx = cpuctx->task_ctx;
+                if (ctx)
+                        perf_adjust_freq_unthr_context(ctx, throttled);
                if (cpuctx->jiffies_interval == 1 ||
                                !(jiffies % cpuctx->jiffies_interval))
                        perf_rotate_context(cpuctx);
@@ -4509,6 +4539,7 @@ static int __perf_event_overflow(struct perf_event *event,
 {
        int events = atomic_read(&event->event_limit);
        struct hw_perf_event *hwc = &event->hw;
+        u64 seq;
        int ret = 0;
        /*
@@ -4518,14 +4549,20 @@ static int __perf_event_overflow(struct perf_event *event,
        if (unlikely(!is_sampling_event(event)))
                return 0;
-        if (unlikely(hwc->interrupts >= max_samples_per_tick)) {
+        seq = __this_cpu_read(perf_throttled_seq);
-                if (throttle) {
+        if (seq != hwc->interrupts_seq) {
+                hwc->interrupts_seq = seq;
+                hwc->interrupts = 1;
+        } else {
+                hwc->interrupts++;
+                if (unlikely(throttle
+                             && hwc->interrupts >= max_samples_per_tick)) {
+                        __this_cpu_inc(perf_throttled_count);
                        hwc->interrupts = MAX_INTERRUPTS;
                        perf_log_throttle(event, 0);
                        ret = 1;
                }
-        } else
+        }
-                hwc->interrupts++;
        if (event->attr.freq) {
                u64 now = perf_clock();
@@ -4534,7 +4571,7 @@ static int __perf_event_overflow(struct perf_event *event,
                hwc->freq_time_stamp = now;
                if (delta > 0 && delta < 2*TICK_NSEC)
-                        perf_adjust_period(event, delta, hwc->last_period);
+                        perf_adjust_period(event, delta, hwc->last_period, true);
        }
        /*
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index b7971d6f38bf..ee706ce44aa0 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -651,10 +651,10 @@ int __init init_hw_breakpoint(void)
 err_alloc:
        for_each_possible_cpu(err_cpu) {
-                if (err_cpu == cpu)
-                        break;
                for (i = 0; i < TYPE_MAX; i++)
                        kfree(per_cpu(nr_task_bp_pinned[i], cpu));
+                if (err_cpu == cpu)
+                        break;
        }
        return -ENOMEM;
diff --git a/kernel/exit.c b/kernel/exit.c
index 294b1709170d..4b4042f9bc6a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1038,6 +1038,22 @@ void do_exit(long code)
        if (tsk->nr_dirtied)
                __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
        exit_rcu();
+        /*
+         * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
+         * when the following two conditions become true.
+         *   - There is race condition of mmap_sem (It is acquired by
+         *     exit_mm()), and
+         *   - SMI occurs before setting TASK_RUNINNG.
+         *     (or hypervisor of virtual machine switches to other guest)
+         *  As a result, we may become TASK_RUNNING after becoming TASK_DEAD
+         *
+         * To avoid it, we have to wait for releasing tsk->pi_lock which
+         * is held by try_to_wake_up()
+         */
+        smp_mb();
+        raw_spin_unlock_wait(&tsk->pi_lock);
        /* causes final put_task_struct in finish_task_switch(). */
        tsk->state = TASK_DEAD;
        tsk->flags |= PF_NOFREEZE;      /* tell freezer to ignore us */
diff --git a/kernel/fork.c b/kernel/fork.c
index 051f090d40c1..26a7a6707fa7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -66,6 +66,7 @@
 #include <linux/user-return-notifier.h>
 #include <linux/oom.h>
 #include <linux/khugepaged.h>
+#include <linux/signalfd.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -647,6 +648,58 @@ struct mm_struct *get_task_mm(struct task_struct *task)
 }
 EXPORT_SYMBOL_GPL(get_task_mm);
+struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
+{
+        struct mm_struct *mm;
+        int err;
+        err =  mutex_lock_killable(&task->signal->cred_guard_mutex);
+        if (err)
+                return ERR_PTR(err);
+        mm = get_task_mm(task);
+        if (mm && mm != current->mm &&
+                        !ptrace_may_access(task, mode)) {
+                mmput(mm);
+                mm = ERR_PTR(-EACCES);
+        }
+        mutex_unlock(&task->signal->cred_guard_mutex);
+        return mm;
+}
+static void complete_vfork_done(struct task_struct *tsk)
+{
+        struct completion *vfork;
+        task_lock(tsk);
+        vfork = tsk->vfork_done;
+        if (likely(vfork)) {
+                tsk->vfork_done = NULL;
+                complete(vfork);
+        }
+        task_unlock(tsk);
+}
+static int wait_for_vfork_done(struct task_struct *child,
+                                struct completion *vfork)
+{
+        int killed;
+        freezer_do_not_count();
+        killed = wait_for_completion_killable(vfork);
+        freezer_count();
+        if (killed) {
+                task_lock(child);
+                child->vfork_done = NULL;
+                task_unlock(child);
+        }
+        put_task_struct(child);
+        return killed;
+}
 /* Please note the differences between mmput and mm_release.
 * mmput is called whenever we stop holding onto a mm_struct,
 * error success whatever.
@@ -662,8 +715,6 @@ EXPORT_SYMBOL_GPL(get_task_mm);
 */
 void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 {
-        struct completion *vfork_done = tsk->vfork_done;
        /* Get rid of any futexes when releasing the mm */
 #ifdef CONFIG_FUTEX
        if (unlikely(tsk->robust_list)) {
@@ -683,17 +734,15 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
        /* Get rid of any cached register state */
        deactivate_mm(tsk, mm);
-        /* notify parent sleeping on vfork() */
+        if (tsk->vfork_done)
-        if (vfork_done) {
+                complete_vfork_done(tsk);
-                tsk->vfork_done = NULL;
-                complete(vfork_done);
-        }
        /*
         * If we're exiting normally, clear a user-space tid field if
         * requested.  We leave this alone when dying by signal, to leave
         * the value intact in a core dump, and to save the unnecessary
-         * trouble otherwise.  Userland only wants this done for a sys_exit.
+         * trouble, say, a killed vfork parent shouldn't touch this mm.
+         * Userland only wants this done for a sys_exit.
         */
        if (tsk->clear_child_tid) {
                if (!(tsk->flags & PF_SIGNALED) &&
@@ -890,7 +939,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
                        return -ENOMEM;
                new_ioc->ioprio = ioc->ioprio;
-                put_io_context(new_ioc, NULL);
+                put_io_context(new_ioc);
        }
 #endif
        return 0;
@@ -915,8 +964,10 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 void __cleanup_sighand(struct sighand_struct *sighand)
 {
-        if (atomic_dec_and_test(&sighand->count))
+        if (atomic_dec_and_test(&sighand->count)) {
+                signalfd_cleanup(sighand);
                kmem_cache_free(sighand_cachep, sighand);
+        }
 }
@@ -995,7 +1046,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
        new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
        new_flags |= PF_FORKNOEXEC;
-        new_flags |= PF_STARTING;
        p->flags = new_flags;
 }
@@ -1525,16 +1575,9 @@ long do_fork(unsigned long clone_flags,
                if (clone_flags & CLONE_VFORK) {
                        p->vfork_done = &vfork;
                        init_completion(&vfork);
+                        get_task_struct(p);
                }
-                /*
-                 * We set PF_STARTING at creation in case tracing wants to
-                 * use this to distinguish a fully live task from one that
-                 * hasn't finished SIGSTOP raising yet.  Now we clear it
-                 * and set the child going.
-                 */
-                p->flags &= ~PF_STARTING;
                wake_up_new_task(p);
                /* forking complete and child started to run, tell ptracer */
@@ -1542,10 +1585,8 @@ long do_fork(unsigned long clone_flags,
                        ptrace_event(trace, nr);
                if (clone_flags & CLONE_VFORK) {
-                        freezer_do_not_count();
+                        if (!wait_for_vfork_done(p, &vfork))
-                        wait_for_completion(&vfork);
+                                ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
-                        freezer_count();
-                        ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
                }
        } else {
                nr = PTR_ERR(p);
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 2e48ec0c2e91..c21449f85a2a 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -119,15 +119,20 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 * For preemptible RCU it is sufficient to call rcu_read_unlock in order
 * to exit the grace period. For classic RCU, a reschedule is required.
 */
-static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
+static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
 {
+        bool can_cont;
        get_task_struct(g);
        get_task_struct(t);
        rcu_read_unlock();
        cond_resched();
        rcu_read_lock();
+        can_cont = pid_alive(g) && pid_alive(t);
        put_task_struct(t);
        put_task_struct(g);
+        return can_cont;
 }
 /*
@@ -154,9 +159,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
                        goto unlock;
                if (!--batch_count) {
                        batch_count = HUNG_TASK_BATCHING;
-                        rcu_lock_break(g, t);
+                        if (!rcu_lock_break(g, t))
-                        /* Exit if t or g was unhashed during refresh. */
-                        if (t->state == TASK_DEAD || g->state == TASK_DEAD)
                                goto unlock;
                }
                /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 342d8f44e401..0119b9d467ae 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -53,7 +53,7 @@ unsigned long probe_irq_on(void)
                        if (desc->irq_data.chip->irq_set_type)
                                desc->irq_data.chip->irq_set_type(&desc->irq_data,
                                                         IRQ_TYPE_PROBE);
-                        irq_startup(desc);
+                        irq_startup(desc, false);
                }
                raw_spin_unlock_irq(&desc->lock);
        }
@@ -70,7 +70,7 @@ unsigned long probe_irq_on(void)
                raw_spin_lock_irq(&desc->lock);
                if (!desc->action && irq_settings_can_probe(desc)) {
                        desc->istate |= IRQS_AUTODETECT | IRQS_WAITING;
-                        if (irq_startup(desc))
+                        if (irq_startup(desc, false))
                                desc->istate |= IRQS_PENDING;
                }
                raw_spin_unlock_irq(&desc->lock);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f7c543a801d9..fb7db75ee0c8 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -157,19 +157,22 @@ static void irq_state_set_masked(struct irq_desc *desc)
        irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
 }
-int irq_startup(struct irq_desc *desc)
+int irq_startup(struct irq_desc *desc, bool resend)
 {
+        int ret = 0;
        irq_state_clr_disabled(desc);
        desc->depth = 0;
        if (desc->irq_data.chip->irq_startup) {
-                int ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
+                ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
                irq_state_clr_masked(desc);
-                return ret;
+        } else {
+                irq_enable(desc);
        }
+        if (resend)
-        irq_enable(desc);
+                check_irq_resend(desc, desc->irq_data.irq);
-        return 0;
+        return ret;
 }
 void irq_shutdown(struct irq_desc *desc)
@@ -330,6 +333,24 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(handle_simple_irq);
+/*
+ * Called unconditionally from handle_level_irq() and only for oneshot
+ * interrupts from handle_fasteoi_irq()
+ */
+static void cond_unmask_irq(struct irq_desc *desc)
+{
+        /*
+         * We need to unmask in the following cases:
+         * - Standard level irq (IRQF_ONESHOT is not set)
+         * - Oneshot irq which did not wake the thread (caused by a
+         *   spurious interrupt or a primary handler handling it
+         *   completely).
+         */
+        if (!irqd_irq_disabled(&desc->irq_data) &&
+            irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot)
+                unmask_irq(desc);
+}
 /**
 *      handle_level_irq - Level type irq handler
 *      @irq:   the interrupt number
@@ -362,8 +383,8 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
        handle_irq_event(desc);
-        if (!irqd_irq_disabled(&desc->irq_data) && !(desc->istate & IRQS_ONESHOT))
+        cond_unmask_irq(desc);
-                unmask_irq(desc);
 out_unlock:
        raw_spin_unlock(&desc->lock);
 }
@@ -417,6 +438,9 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
        preflow_handler(desc);
        handle_irq_event(desc);
+        if (desc->istate & IRQS_ONESHOT)
+                cond_unmask_irq(desc);
 out_eoi:
        desc->irq_data.chip->irq_eoi(&desc->irq_data);
 out_unlock:
@@ -625,7 +649,7 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
                irq_settings_set_noprobe(desc);
                irq_settings_set_norequest(desc);
                irq_settings_set_nothread(desc);
-                irq_startup(desc);
+                irq_startup(desc, true);
        }
 out:
        irq_put_desc_busunlock(desc, flags);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index b7952316016a..40378ff877e7 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -67,7 +67,7 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
 extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
-extern int irq_startup(struct irq_desc *desc);
+extern int irq_startup(struct irq_desc *desc, bool resend);
 extern void irq_shutdown(struct irq_desc *desc);
 extern void irq_enable(struct irq_desc *desc);
 extern void irq_disable(struct irq_desc *desc);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index a9a9dbe49fea..0f0d4704ddd8 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -985,6 +985,11 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
                /* add new interrupt at end of irq queue */
                do {
+                        /*
+                         * Or all existing action->thread_mask bits,
+                         * so we can find the next zero bit for this
+                         * new action.
+                         */
                        thread_mask |= old->thread_mask;
                        old_ptr = &old->next;
                        old = *old_ptr;
@@ -993,14 +998,41 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
        }
        /*
-         * Setup the thread mask for this irqaction. Unlikely to have
+         * Setup the thread mask for this irqaction for ONESHOT. For
-         * 32 resp 64 irqs sharing one line, but who knows.
+         * !ONESHOT irqs the thread mask is 0 so we can avoid a
+         * conditional in irq_wake_thread().
         */
-        if (new->flags & IRQF_ONESHOT && thread_mask == ~0UL) {
+        if (new->flags & IRQF_ONESHOT) {
-                ret = -EBUSY;
+                /*
-                goto out_mask;
+                 * Unlikely to have 32 resp 64 irqs sharing one line,
+                 * but who knows.
+                 */
+                if (thread_mask == ~0UL) {
+                        ret = -EBUSY;
+                        goto out_mask;
+                }
+                /*
+                 * The thread_mask for the action is or'ed to
+                 * desc->thread_active to indicate that the
+                 * IRQF_ONESHOT thread handler has been woken, but not
+                 * yet finished. The bit is cleared when a thread
+                 * completes. When all threads of a shared interrupt
+                 * line have completed desc->threads_active becomes
+                 * zero and the interrupt line is unmasked. See
+                 * handle.c:irq_wake_thread() for further information.
+                 *
+                 * If no thread is woken by primary (hard irq context)
+                 * interrupt handlers, then desc->threads_active is
+                 * also checked for zero to unmask the irq line in the
+                 * affected hard irq flow handlers
+                 * (handle_[fasteoi|level]_irq).
+                 *
+                 * The new action gets the first zero bit of
+                 * thread_mask assigned. See the loop above which or's
+                 * all existing action->thread_mask bits.
+                 */
+                new->thread_mask = 1 << ffz(thread_mask);
        }
-        new->thread_mask = 1 << ffz(thread_mask);
        if (!shared) {
                init_waitqueue_head(&desc->wait_for_threads);
@@ -1027,7 +1059,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
                        desc->istate |= IRQS_ONESHOT;
                if (irq_settings_can_autoenable(desc))
-                        irq_startup(desc);
+                        irq_startup(desc, true);
                else
                        /* Undo nested disables: */
                        desc->depth = 1;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 95dd7212e610..c62b8546cc90 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1077,6 +1077,7 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
                /* Early boot.  kretprobe_table_locks not yet initialized. */
                return;
+        INIT_HLIST_HEAD(&empty_rp);
        hash = hash_ptr(tk, KPROBE_HASH_BITS);
        head = &kretprobe_inst_table[hash];
        kretprobe_table_lock(hash, &flags);
@@ -1085,7 +1086,6 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
                        recycle_rp_inst(ri, &empty_rp);
        }
        kretprobe_table_unlock(hash, &flags);
-        INIT_HLIST_HEAD(&empty_rp);
        hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
                hlist_del(&ri->hlist);
                kfree(ri);
@@ -1334,8 +1334,10 @@ int __kprobes register_kprobe(struct kprobe *p)
        if (!kernel_text_address((unsigned long) p->addr) ||
            in_kprobes_functions((unsigned long) p->addr) ||
            ftrace_text_reserved(p->addr, p->addr) ||
-            jump_label_text_reserved(p->addr, p->addr))
+            jump_label_text_reserved(p->addr, p->addr)) {
-                goto fail_with_jump_label;
+                ret = -EINVAL;
+                goto cannot_probe;
+        }
        /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
        p->flags &= KPROBE_FLAG_DISABLED;
@@ -1352,7 +1354,7 @@ int __kprobes register_kprobe(struct kprobe *p)
                 * its code to prohibit unexpected unloading.
                 */
                if (unlikely(!try_module_get(probed_mod)))
-                        goto fail_with_jump_label;
+                        goto cannot_probe;
                /*
                 * If the module freed .init.text, we couldn't insert
@@ -1361,7 +1363,7 @@ int __kprobes register_kprobe(struct kprobe *p)
                if (within_module_init((unsigned long)p->addr, probed_mod) &&
                    probed_mod->state != MODULE_STATE_COMING) {
                        module_put(probed_mod);
-                        goto fail_with_jump_label;
+                        goto cannot_probe;
                }
                /* ret will be updated by following code */
        }
@@ -1409,7 +1411,7 @@ out:
        return ret;
-fail_with_jump_label:
+cannot_probe:
        preempt_enable();
        jump_label_unlock();
        return ret;
@@ -1673,8 +1675,12 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
                ri->rp = rp;
                ri->task = current;
-                if (rp->entry_handler && rp->entry_handler(ri, regs))
+                if (rp->entry_handler && rp->entry_handler(ri, regs)) {
+                        raw_spin_lock_irqsave(&rp->lock, flags);
+                        hlist_add_head(&ri->hlist, &rp->free_instances);
+                        raw_spin_unlock_irqrestore(&rp->lock, flags);
                        return 0;
+                }
                arch_prepare_kretprobe(ri, regs);
diff --git a/kernel/params.c b/kernel/params.c
index 32ee04308285..4bc965d8a1fe 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -97,7 +97,8 @@ static int parse_one(char *param,
        for (i = 0; i < num_params; i++) {
                if (parameq(param, params[i].name)) {
                        /* No one handled NULL, so do it here. */
-                        if (!val && params[i].ops->set != param_set_bool)
+                        if (!val && params[i].ops->set != param_set_bool
+                            && params[i].ops->set != param_set_bint)
                                return -EINVAL;
                        pr_debug("They are equal!  Calling %p\n",
                               params[i].ops->set);
diff --git a/kernel/pid.c b/kernel/pid.c
index ce8e00deaccb..9f08dfabaf13 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -543,12 +543,12 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 */
 void __init pidhash_init(void)
 {
-        int i, pidhash_size;
+        unsigned int i, pidhash_size;
        pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
                                           HASH_EARLY | HASH_SMALL,
                                           &pidhash_shift, NULL, 4096);
-        pidhash_size = 1 << pidhash_shift;
+        pidhash_size = 1U << pidhash_shift;
        for (i = 0; i < pidhash_size; i++)
                INIT_HLIST_HEAD(&pid_hash[i]);
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 0c4defe6d3b8..21724eee5206 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -231,8 +231,28 @@ extern int pm_test_level;
 #ifdef CONFIG_SUSPEND_FREEZER
 static inline int suspend_freeze_processes(void)
 {
-        int error = freeze_processes();
+        int error;
-        return error ? : freeze_kernel_threads();
+        error = freeze_processes();
+        /*
+         * freeze_processes() automatically thaws every task if freezing
+         * fails. So we need not do anything extra upon error.
+         */
+        if (error)
+                goto Finish;
+        error = freeze_kernel_threads();
+        /*
+         * freeze_kernel_threads() thaws only kernel threads upon freezing
+         * failure. So we have to thaw the userspace tasks ourselves.
+         */
+        if (error)
+                thaw_processes();
+ Finish:
+        return error;
 }
 static inline void suspend_thaw_processes(void)
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 77274c9ba2f1..7e426459e60a 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -143,7 +143,10 @@ int freeze_processes(void)
 /**
 * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator.
 *
- * On success, returns 0.  On failure, -errno and system is fully thawed.
+ * On success, returns 0.  On failure, -errno and only the kernel threads are
+ * thawed, so as to give a chance to the caller to do additional cleanups
+ * (if any) before thawing the userspace tasks. So, it is the responsibility
+ * of the caller to thaw the userspace tasks, when the time is right.
 */
 int freeze_kernel_threads(void)
 {
@@ -159,7 +162,7 @@ int freeze_kernel_threads(void)
        BUG_ON(in_atomic());
        if (error)
-                thaw_processes();
+                thaw_kernel_threads();
        return error;
 }
@@ -188,3 +191,22 @@ void thaw_processes(void)
        printk("done.\n");
 }
+void thaw_kernel_threads(void)
+{
+        struct task_struct *g, *p;
+        pm_nosig_freezing = false;
+        printk("Restarting kernel threads ... ");
+        thaw_workqueues();
+        read_lock(&tasklist_lock);
+        do_each_thread(g, p) {
+                if (p->flags & (PF_KTHREAD | PF_WQ_WORKER))
+                        __thaw_task(p);
+        } while_each_thread(g, p);
+        read_unlock(&tasklist_lock);
+        schedule();
+        printk("done.\n");
+}
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 1cf88900ec4f..6a768e537001 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -812,7 +812,8 @@ unsigned int snapshot_additional_pages(struct zone *zone)
        unsigned int res;
        res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
-        res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE);
+        res += DIV_ROUND_UP(res * sizeof(struct bm_block),
+                            LINKED_PAGE_DATA_SIZE);
        return 2 * res;
 }
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 6b1ab7a88522..3e100075b13c 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -249,13 +249,15 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
                }
                pm_restore_gfp_mask();
                error = hibernation_snapshot(data->platform_support);
-                if (!error) {
+                if (error) {
+                        thaw_kernel_threads();
+                } else {
                        error = put_user(in_suspend, (int __user *)arg);
                        if (!error && !freezer_test_done)
                                data->ready = 1;
                        if (freezer_test_done) {
                                freezer_test_done = false;
-                                thaw_processes();
+                                thaw_kernel_threads();
                        }
                }
                break;
@@ -274,6 +276,15 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
                swsusp_free();
                memset(&data->handle, 0, sizeof(struct snapshot_handle));
                data->ready = 0;
+                /*
+                 * It is necessary to thaw kernel threads here, because
+                 * SNAPSHOT_CREATE_IMAGE may be invoked directly after
+                 * SNAPSHOT_FREE.  In that case, if kernel threads were not
+                 * thawed, the preallocation of memory carried out by
+                 * hibernation_snapshot() might run into problems (i.e. it
+                 * might fail or even deadlock).
+                 */
+                thaw_kernel_threads();
                break;
        case SNAPSHOT_PREF_IMAGE_SIZE:
diff --git a/kernel/printk.c b/kernel/printk.c
index 13c0a1143f49..32690a0b7a18 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -702,6 +702,9 @@ static bool printk_time = 0;
 #endif
 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
+static bool always_kmsg_dump;
+module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
 /* Check if we have any console registered that can be called early in boot. */
 static int have_callable_console(void)
 {
@@ -1732,6 +1735,9 @@ void kmsg_dump(enum kmsg_dump_reason reason)
        unsigned long l1, l2;
        unsigned long flags;
+        if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
+                return;
        /* Theoretically, the log could move on after we do this, but
           there's not a lot we can do about that. The new messages
           will overwrite the start of what we dump. */
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 88f17b8a3b1d..a58ac285fc69 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -56,8 +56,8 @@ static int nreaders = -1;	/* # reader threads, defaults to 2*ncpus */
 static int nfakewriters = 4;    /* # fake writer threads */
 static int stat_interval;       /* Interval between stats, in seconds. */
                                /*  Defaults to "only at end of test". */
-static int verbose;             /* Print more debug info. */
+static bool verbose;            /* Print more debug info. */
-static int test_no_idle_hz;     /* Test RCU's support for tickless idle CPUs. */
+static bool test_no_idle_hz;    /* Test RCU's support for tickless idle CPUs. */
 static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/
 static int stutter = 5;         /* Start/stop testing interval (in sec) */
 static int irqreader = 1;       /* RCU readers from irq (timers). */
@@ -1399,7 +1399,7 @@ rcu_torture_shutdown(void *arg)
 * Execute random CPU-hotplug operations at the interval specified
 * by the onoff_interval.
 */
-static int
+static int __cpuinit
 rcu_torture_onoff(void *arg)
 {
        int cpu;
@@ -1447,7 +1447,7 @@ rcu_torture_onoff(void *arg)
        return 0;
 }
-static int
+static int __cpuinit
 rcu_torture_onoff_init(void)
 {
        if (onoff_interval <= 0)
diff --git a/kernel/relay.c b/kernel/relay.c
index 4335e1d7ee2d..ab56a1764d4d 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -164,10 +164,14 @@ depopulate:
 */
 static struct rchan_buf *relay_create_buf(struct rchan *chan)
 {
-        struct rchan_buf *buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL);
+        struct rchan_buf *buf;
-        if (!buf)
+        if (chan->n_subbufs > UINT_MAX / sizeof(size_t *))
                return NULL;
+        buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL);
+        if (!buf)
+                return NULL;
        buf->padding = kmalloc(chan->n_subbufs * sizeof(size_t *), GFP_KERNEL);
        if (!buf->padding)
                goto free_buf;
@@ -574,6 +578,8 @@ struct rchan *relay_open(const char *base_filename,
        if (!(subbuf_size && n_subbufs))
                return NULL;
+        if (subbuf_size > UINT_MAX / n_subbufs)
+                return NULL;
        chan = kzalloc(sizeof(struct rchan), GFP_KERNEL);
        if (!chan)
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 6d269cce7aa1..d508363858b3 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -66,6 +66,31 @@ done:
        return ret;
 }
+int res_counter_charge_nofail(struct res_counter *counter, unsigned long val,
+                              struct res_counter **limit_fail_at)
+{
+        int ret, r;
+        unsigned long flags;
+        struct res_counter *c;
+        r = ret = 0;
+        *limit_fail_at = NULL;
+        local_irq_save(flags);
+        for (c = counter; c != NULL; c = c->parent) {
+                spin_lock(&c->lock);
+                r = res_counter_charge_locked(c, val);
+                if (r)
+                        c->usage += val;
+                spin_unlock(&c->lock);
+                if (r < 0 && ret == 0) {
+                        *limit_fail_at = c;
+                        ret = r;
+                }
+        }
+        local_irq_restore(flags);
+        return ret;
+}
 void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
 {
        if (WARN_ON(counter->usage < val))
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index df00cb09263e..b342f57879e6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -74,6 +74,7 @@
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
+#include <asm/mutex.h>
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #endif
@@ -723,9 +724,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
        p->sched_class->dequeue_task(rq, p, flags);
 }
-/*
- * activate_task - move a task to the runqueue.
- */
 void activate_task(struct rq *rq, struct task_struct *p, int flags)
 {
        if (task_contributes_to_load(p))
@@ -734,9 +732,6 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags)
        enqueue_task(rq, p, flags);
 }
-/*
- * deactivate_task - remove a task from the runqueue.
- */
 void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
 {
        if (task_contributes_to_load(p))
@@ -1937,7 +1932,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
        local_irq_enable();
 #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
        finish_lock_switch(rq, prev);
-        trace_sched_stat_sleeptime(current, rq->clock);
        fire_sched_in_preempt_notifiers(current);
        if (mm)
@@ -4134,7 +4128,7 @@ recheck:
        on_rq = p->on_rq;
        running = task_current(rq, p);
        if (on_rq)
-                deactivate_task(rq, p, 0);
+                dequeue_task(rq, p, 0);
        if (running)
                p->sched_class->put_prev_task(rq, p);
@@ -4147,7 +4141,7 @@ recheck:
        if (running)
                p->sched_class->set_curr_task(rq);
        if (on_rq)
-                activate_task(rq, p, 0);
+                enqueue_task(rq, p, 0);
        check_class_changed(rq, p, prev_class, oldprio);
        task_rq_unlock(rq, p, &flags);
@@ -4998,9 +4992,9 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
         * placed properly.
         */
        if (p->on_rq) {
-                deactivate_task(rq_src, p, 0);
+                dequeue_task(rq_src, p, 0);
                set_task_cpu(p, dest_cpu);
-                activate_task(rq_dest, p, 0);
+                enqueue_task(rq_dest, p, 0);
                check_preempt_curr(rq_dest, p, 0);
        }
 done:
@@ -7032,10 +7026,10 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
        on_rq = p->on_rq;
        if (on_rq)
-                deactivate_task(rq, p, 0);
+                dequeue_task(rq, p, 0);
        __setscheduler(rq, p, SCHED_NORMAL, 0);
        if (on_rq) {
-                activate_task(rq, p, 0);
+                enqueue_task(rq, p, 0);
                resched_task(rq->curr);
        }
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index b0d798eaf130..d72586fdf660 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -129,7 +129,7 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
 * cpupri_set - update the cpu priority setting
 * @cp: The cpupri context
 * @cpu: The target cpu
- * @pri: The priority (INVALID-RT99) to assign to this CPU
+ * @newpri: The priority (INVALID-RT99) to assign to this CPU
 *
 * Note: Assumes cpu_rq(cpu)->lock is locked
 *
@@ -200,7 +200,6 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 /**
 * cpupri_init - initialize the cpupri structure
 * @cp: The cpupri context
- * @bootmem: true if allocations need to use bootmem
 *
 * Returns: -ENOMEM if memory fails.
 */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 84adb2d66cbd..aca16b843b7e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1003,6 +1003,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
                if (unlikely(delta > se->statistics.sleep_max))
                        se->statistics.sleep_max = delta;
+                se->statistics.sleep_start = 0;
                se->statistics.sum_sleep_runtime += delta;
                if (tsk) {
@@ -1019,6 +1020,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
                if (unlikely(delta > se->statistics.block_max))
                        se->statistics.block_max = delta;
+                se->statistics.block_start = 0;
                se->statistics.sum_sleep_runtime += delta;
                if (tsk) {
@@ -4866,6 +4868,15 @@ static void nohz_balancer_kick(int cpu)
        return;
 }
+static inline void clear_nohz_tick_stopped(int cpu)
+{
+        if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
+                cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
+                atomic_dec(&nohz.nr_cpus);
+                clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
+        }
+}
 static inline void set_cpu_sd_state_busy(void)
 {
        struct sched_domain *sd;
@@ -4904,6 +4915,12 @@ void select_nohz_load_balancer(int stop_tick)
 {
        int cpu = smp_processor_id();
+        /*
+         * If this cpu is going down, then nothing needs to be done.
+         */
+        if (!cpu_active(cpu))
+                return;
        if (stop_tick) {
                if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))
                        return;
@@ -4914,6 +4931,18 @@ void select_nohz_load_balancer(int stop_tick)
        }
        return;
 }
+static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb,
+                                        unsigned long action, void *hcpu)
+{
+        switch (action & ~CPU_TASKS_FROZEN) {
+        case CPU_DYING:
+                clear_nohz_tick_stopped(smp_processor_id());
+                return NOTIFY_OK;
+        default:
+                return NOTIFY_DONE;
+        }
+}
 #endif
 static DEFINE_SPINLOCK(balancing);
@@ -5070,11 +5099,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
        * busy tick after returning from idle, we will update the busy stats.
        */
        set_cpu_sd_state_busy();
-        if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
+        clear_nohz_tick_stopped(cpu);
-                clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
-                cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
-                atomic_dec(&nohz.nr_cpus);
-        }
        /*
         * None are in tickless mode and hence no need for NOHZ idle load
@@ -5590,6 +5615,7 @@ __init void init_sched_fair_class(void)
 #ifdef CONFIG_NO_HZ
        zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
+        cpu_notifier(sched_ilb_notifier, 0);
 #endif
 #endif /* SMP */
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 3640ebbb466b..f42ae7fb5ec5 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1587,6 +1587,11 @@ static int push_rt_task(struct rq *rq)
        if (!next_task)
                return 0;
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+       if (unlikely(task_running(rq, next_task)))
+               return 0;
+#endif
 retry:
        if (unlikely(next_task == rq->curr)) {
                WARN_ON(1);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 1d7bca7f4f52..d117262deba3 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -296,7 +296,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
                if (__this_cpu_read(soft_watchdog_warn) == true)
                        return HRTIMER_RESTART;
-                printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
+                printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
                        smp_processor_id(), duration,
                        current->comm, task_pid_nr(current));
                print_modules();