10 files changed, 137 insertions, 19 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index f33afb0407bc..f394d2a42ca3 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -501,6 +501,13 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base)
                        continue;
                timer = rb_entry(base->first, struct hrtimer, node);
                expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
+                /*
+                 * clock_was_set() has changed base->offset so the
+                 * result might be negative. Fix it up to prevent a
+                 * false positive in clockevents_program_event()
+                 */
+                if (expires.tv64 < 0)
+                        expires.tv64 = 0;
                if (expires.tv64 < cpu_base->expires_next.tv64)
                        cpu_base->expires_next = expires;
        }
@@ -1158,6 +1165,29 @@ static void __run_hrtimer(struct hrtimer *timer)
 #ifdef CONFIG_HIGH_RES_TIMERS
+static int force_clock_reprogram;
+/*
+ * After 5 iteration's attempts, we consider that hrtimer_interrupt()
+ * is hanging, which could happen with something that slows the interrupt
+ * such as the tracing. Then we force the clock reprogramming for each future
+ * hrtimer interrupts to avoid infinite loops and use the min_delta_ns
+ * threshold that we will overwrite.
+ * The next tick event will be scheduled to 3 times we currently spend on
+ * hrtimer_interrupt(). This gives a good compromise, the cpus will spend
+ * 1/4 of their time to process the hrtimer interrupts. This is enough to
+ * let it running without serious starvation.
+ */
+static inline void
+hrtimer_interrupt_hanging(struct clock_event_device *dev,
+                        ktime_t try_time)
+{
+        force_clock_reprogram = 1;
+        dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
+        printk(KERN_WARNING "hrtimer: interrupt too slow, "
+                "forcing clock min delta to %lu ns\n", dev->min_delta_ns);
+}
 /*
 * High resolution timer interrupt
 * Called with interrupts disabled
@@ -1167,6 +1197,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
        struct hrtimer_clock_base *base;
        ktime_t expires_next, now;
+        int nr_retries = 0;
        int i;
        BUG_ON(!cpu_base->hres_active);
@@ -1174,6 +1205,10 @@ void hrtimer_interrupt(struct clock_event_device *dev)
        dev->next_event.tv64 = KTIME_MAX;
 retry:
+        /* 5 retries is enough to notice a hang */
+        if (!(++nr_retries % 5))
+                hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
        now = ktime_get();
        expires_next.tv64 = KTIME_MAX;
@@ -1226,7 +1261,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
        /* Reprogramming necessary ? */
        if (expires_next.tv64 != KTIME_MAX) {
-                if (tick_program_event(expires_next, 0))
+                if (tick_program_event(expires_next, force_clock_reprogram))
                        goto retry;
        }
 }
@@ -1580,6 +1615,10 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
                break;
 #ifdef CONFIG_HOTPLUG_CPU
+        case CPU_DYING:
+        case CPU_DYING_FROZEN:
+                clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
+                break;
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
        {
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f63c706d25e1..7de11bd64dfe 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -383,6 +383,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 out_unlock:
        spin_unlock(&desc->lock);
 }
+EXPORT_SYMBOL_GPL(handle_level_irq);
 /**
 *      handle_fasteoi_irq - irq handler for transparent controllers
@@ -593,6 +594,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
        }
        spin_unlock_irqrestore(&desc->lock, flags);
 }
+EXPORT_SYMBOL_GPL(__set_irq_handler);
 void
 set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
diff --git a/kernel/signal.c b/kernel/signal.c
index e73759783dc8..b6b36768b758 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -909,7 +909,9 @@ static void print_fatal_signal(struct pt_regs *regs, int signr)
        }
 #endif
        printk("\n");
+        preempt_disable();
        show_regs(regs);
+        preempt_enable();
 }
 static int __init setup_print_fatal_signals(char *str)
diff --git a/kernel/smp.c b/kernel/smp.c
index 5cfa0e5e3e88..bbedbb7efe32 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -18,6 +18,7 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
 enum {
        CSD_FLAG_WAIT           = 0x01,
        CSD_FLAG_ALLOC          = 0x02,
+        CSD_FLAG_LOCK           = 0x04,
 };
 struct call_function_data {
@@ -186,6 +187,9 @@ void generic_smp_call_function_single_interrupt(void)
                        if (data_flags & CSD_FLAG_WAIT) {
                                smp_wmb();
                                data->flags &= ~CSD_FLAG_WAIT;
+                        } else if (data_flags & CSD_FLAG_LOCK) {
+                                smp_wmb();
+                                data->flags &= ~CSD_FLAG_LOCK;
                        } else if (data_flags & CSD_FLAG_ALLOC)
                                kfree(data);
                }
@@ -196,6 +200,8 @@ void generic_smp_call_function_single_interrupt(void)
        }
 }
+static DEFINE_PER_CPU(struct call_single_data, csd_data);
 /*
 * smp_call_function_single - Run a function on a specific CPU
 * @func: The function to run. This must be fast and non-blocking.
@@ -224,14 +230,38 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
                func(info);
                local_irq_restore(flags);
        } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
-                struct call_single_data *data = NULL;
+                struct call_single_data *data;
                if (!wait) {
+                        /*
+                         * We are calling a function on a single CPU
+                         * and we are not going to wait for it to finish.
+                         * We first try to allocate the data, but if we
+                         * fail, we fall back to use a per cpu data to pass
+                         * the information to that CPU. Since all callers
+                         * of this code will use the same data, we must
+                         * synchronize the callers to prevent a new caller
+                         * from corrupting the data before the callee
+                         * can access it.
+                         *
+                         * The CSD_FLAG_LOCK is used to let us know when
+                         * the IPI handler is done with the data.
+                         * The first caller will set it, and the callee
+                         * will clear it. The next caller must wait for
+                         * it to clear before we set it again. This
+                         * will make sure the callee is done with the
+                         * data before a new caller will use it.
+                         */
                        data = kmalloc(sizeof(*data), GFP_ATOMIC);
                        if (data)
                                data->flags = CSD_FLAG_ALLOC;
-                }
+                        else {
-                if (!data) {
+                                data = &per_cpu(csd_data, me);
+                                while (data->flags & CSD_FLAG_LOCK)
+                                        cpu_relax();
+                                data->flags = CSD_FLAG_LOCK;
+                        }
+                } else {
                        data = &d;
                        data->flags = CSD_FLAG_WAIT;
                }
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 63e05d423a09..21a5ca849514 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -274,6 +274,21 @@ out_bc:
 }
 /*
+ * Transfer the do_timer job away from a dying cpu.
+ *
+ * Called with interrupts disabled.
+ */
+static void tick_handover_do_timer(int *cpup)
+{
+        if (*cpup == tick_do_timer_cpu) {
+                int cpu = cpumask_first(cpu_online_mask);
+                tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
+                        TICK_DO_TIMER_NONE;
+        }
+}
+/*
 * Shutdown an event device on a given cpu:
 *
 * This is called on a life CPU, when a CPU is dead. So we cannot
@@ -297,13 +312,6 @@ static void tick_shutdown(unsigned int *cpup)
                clockevents_exchange_device(dev, NULL);
                td->evtdev = NULL;
        }
-        /* Transfer the do_timer job away from this cpu */
-        if (*cpup == tick_do_timer_cpu) {
-                int cpu = cpumask_first(cpu_online_mask);
-                tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
-                        TICK_DO_TIMER_NONE;
-        }
        spin_unlock_irqrestore(&tick_device_lock, flags);
 }
@@ -357,6 +365,10 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason,
                tick_broadcast_oneshot_control(reason);
                break;
+        case CLOCK_EVT_NOTIFY_CPU_DYING:
+                tick_handover_do_timer(dev);
+                break;
        case CLOCK_EVT_NOTIFY_CPU_DEAD:
                tick_shutdown_broadcast_oneshot(dev);
                tick_shutdown_broadcast(dev);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2f32969c09df..7dcf6e9f2b04 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -17,6 +17,7 @@
 #include <linux/clocksource.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
+#include <linux/suspend.h>
 #include <linux/debugfs.h>
 #include <linux/hardirq.h>
 #include <linux/kthread.h>
@@ -1965,6 +1966,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 static atomic_t ftrace_graph_active;
+static struct notifier_block ftrace_suspend_notifier;
 int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
 {
@@ -2043,6 +2045,27 @@ static int start_graph_tracing(void)
        return ret;
 }
+/*
+ * Hibernation protection.
+ * The state of the current task is too much unstable during
+ * suspend/restore to disk. We want to protect against that.
+ */
+static int
+ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
+                                                        void *unused)
+{
+        switch (state) {
+        case PM_HIBERNATION_PREPARE:
+                pause_graph_tracing();
+                break;
+        case PM_POST_HIBERNATION:
+                unpause_graph_tracing();
+                break;
+        }
+        return NOTIFY_DONE;
+}
 int register_ftrace_graph(trace_func_graph_ret_t retfunc,
                        trace_func_graph_ent_t entryfunc)
 {
@@ -2050,6 +2073,9 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
        mutex_lock(&ftrace_sysctl_lock);
+        ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
+        register_pm_notifier(&ftrace_suspend_notifier);
        atomic_inc(&ftrace_graph_active);
        ret = start_graph_tracing();
        if (ret) {
@@ -2075,6 +2101,7 @@ void unregister_ftrace_graph(void)
        ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
        ftrace_graph_entry = ftrace_graph_entry_stub;
        ftrace_shutdown(FTRACE_STOP_FUNC_RET);
+        unregister_pm_notifier(&ftrace_suspend_notifier);
        mutex_unlock(&ftrace_sysctl_lock);
 }
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8b0daf0662ef..bd38c5cfd8ad 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -246,7 +246,7 @@ static inline int test_time_stamp(u64 delta)
        return 0;
 }
-#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
+#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data))
 /*
 * head_page == tail_page && head == tail then buffer is empty.
@@ -1025,12 +1025,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
                }
                if (next_page == head_page) {
-                        if (!(buffer->flags & RB_FL_OVERWRITE)) {
+                        if (!(buffer->flags & RB_FL_OVERWRITE))
-                                /* reset write */
-                                if (tail <= BUF_PAGE_SIZE)
-                                        local_set(&tail_page->write, tail);
                                goto out_unlock;
-                        }
                        /* tail_page has not moved yet? */
                        if (tail_page == cpu_buffer->tail_page) {
@@ -1105,6 +1101,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
        return event;
 out_unlock:
+        /* reset write */
+        if (tail <= BUF_PAGE_SIZE)
+                local_set(&tail_page->write, tail);
        __raw_spin_unlock(&cpu_buffer->lock);
        local_irq_restore(flags);
        return NULL;
@@ -2174,6 +2174,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
        cpu_buffer->overrun = 0;
        cpu_buffer->entries = 0;
+        cpu_buffer->write_stamp = 0;
+        cpu_buffer->read_stamp = 0;
 }
 /**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c580233add95..17bb88d86ac2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -40,7 +40,7 @@
 #define TRACE_BUFFER_FLAGS      (RB_FL_OVERWRITE)
-unsigned long __read_mostly     tracing_max_latency = (cycle_t)ULONG_MAX;
+unsigned long __read_mostly     tracing_max_latency;
 unsigned long __read_mostly     tracing_thresh;
 /*
@@ -3736,7 +3736,7 @@ static struct notifier_block trace_die_notifier = {
 * it if we decide to change what log level the ftrace dump
 * should be at.
 */
-#define KERN_TRACE              KERN_INFO
+#define KERN_TRACE              KERN_EMERG
 static void
 trace_printk_seq(struct trace_seq *s)
@@ -3770,6 +3770,7 @@ void ftrace_dump(void)
        dump_ran = 1;
        /* No turning back! */
+        tracing_off();
        ftrace_kill();
        for_each_tracing_cpu(cpu) {
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 7c2e326bbc8b..62a78d943534 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -380,6 +380,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr)
 static void __irqsoff_tracer_init(struct trace_array *tr)
 {
+        tracing_max_latency = 0;
        irqsoff_trace = tr;
        /* make sure that the tracer is visible */
        smp_wmb();
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 43586b689e31..42ae1e77b6b3 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -333,6 +333,7 @@ static void stop_wakeup_tracer(struct trace_array *tr)
 static int wakeup_tracer_init(struct trace_array *tr)
 {
+        tracing_max_latency = 0;
        wakeup_trace = tr;
        start_wakeup_tracer(tr);
        return 0;