Merge branch 'v2.6.37-rc2' into for-2.6.38/core

author: Jens Axboe <jaxboe@fusionio.com> 2010-11-16 04:09:42 -0500
committer: Jens Axboe <jaxboe@fusionio.com> 2010-11-16 04:09:42 -0500
commit: a02056349cdea2252cd2b21643ebf025e83a29f2 (patch)
tree: b7c889d6cbe8e7188d07d99a5c9da858c53a5b6c /kernel/trace
parent: 34db1d595ef6f183fbc1e42cda45a3dfa0035258 (diff)
parent: e53beacd23d9cb47590da6a7a7f6d417b941a994 (diff)
15 files changed, 825 insertions, 423 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 538501c6ea50..e04b8bcdef88 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -49,6 +49,11 @@ config HAVE_SYSCALL_TRACEPOINTS
        help
          See Documentation/trace/ftrace-design.txt
+config HAVE_C_RECORDMCOUNT
+        bool
+        help
+          C version of recordmcount available?
 config TRACER_MAX_TRACE
        bool
@@ -121,7 +126,7 @@ if FTRACE
 config FUNCTION_TRACER
        bool "Kernel Function Tracer"
        depends on HAVE_FUNCTION_TRACER
-        select FRAME_POINTER
+        select FRAME_POINTER if (!ARM_UNWIND)
        select KALLSYMS
        select GENERIC_TRACER
        select CONTEXT_SWITCH_TRACER
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 959f8d6c8cc1..7b8ec0281548 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -23,7 +23,6 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
-#include <linux/smp_lock.h>
 #include <linux/time.h>
 #include <linux/uaccess.h>
@@ -169,7 +168,6 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
 static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
                                 BLK_TC_ACT(BLK_TC_WRITE) };
-#define BLK_TC_HARDBARRIER      BLK_TC_BARRIER
 #define BLK_TC_RAHEAD           BLK_TC_AHEAD
 /* The ilog2() calls fall out because they're constant */
@@ -197,7 +195,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
                return;
        what |= ddir_act[rw & WRITE];
-        what |= MASK_TC_BIT(rw, HARDBARRIER);
        what |= MASK_TC_BIT(rw, SYNC);
        what |= MASK_TC_BIT(rw, RAHEAD);
        what |= MASK_TC_BIT(rw, META);
@@ -326,6 +323,7 @@ static const struct file_operations blk_dropped_fops = {
        .owner =        THIS_MODULE,
        .open =         blk_dropped_open,
        .read =         blk_dropped_read,
+        .llseek =       default_llseek,
 };
 static int blk_msg_open(struct inode *inode, struct file *filp)
@@ -365,6 +363,7 @@ static const struct file_operations blk_msg_fops = {
        .owner =        THIS_MODULE,
        .open =         blk_msg_open,
        .write =        blk_msg_write,
+        .llseek =       noop_llseek,
 };
 /*
@@ -639,7 +638,6 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
        if (!q)
                return -ENXIO;
-        lock_kernel();
        mutex_lock(&bdev->bd_mutex);
        switch (cmd) {
@@ -667,7 +665,6 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
        }
        mutex_unlock(&bdev->bd_mutex);
-        unlock_kernel();
        return ret;
 }
@@ -1652,10 +1649,9 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
        struct block_device *bdev;
        ssize_t ret = -ENXIO;
-        lock_kernel();
        bdev = bdget(part_devt(p));
        if (bdev == NULL)
-                goto out_unlock_kernel;
+                goto out;
        q = blk_trace_get_queue(bdev);
        if (q == NULL)
@@ -1683,8 +1679,7 @@ out_unlock_bdev:
        mutex_unlock(&bdev->bd_mutex);
 out_bdput:
        bdput(bdev);
-out_unlock_kernel:
+out:
-        unlock_kernel();
        return ret;
 }
@@ -1714,11 +1709,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
        ret = -ENXIO;
-        lock_kernel();
        p = dev_to_part(dev);
        bdev = bdget(part_devt(p));
        if (bdev == NULL)
-                goto out_unlock_kernel;
+                goto out;
        q = blk_trace_get_queue(bdev);
        if (q == NULL)
@@ -1753,8 +1747,6 @@ out_unlock_bdev:
        mutex_unlock(&bdev->bd_mutex);
 out_bdput:
        bdput(bdev);
-out_unlock_kernel:
-        unlock_kernel();
 out:
        return ret ? ret : count;
 }
@@ -1813,8 +1805,6 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
        if (rw & REQ_RAHEAD)
                rwbs[i++] = 'A';
-        if (rw & REQ_HARDBARRIER)
-                rwbs[i++] = 'B';
        if (rw & REQ_SYNC)
                rwbs[i++] = 'S';
        if (rw & REQ_META)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fa7ece649fe1..f3dadae83883 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -800,6 +800,7 @@ static const struct file_operations ftrace_profile_fops = {
        .open           = tracing_open_generic,
        .read           = ftrace_profile_read,
        .write          = ftrace_profile_write,
+        .llseek         = default_llseek,
 };
 /* used to initialize the real stat files */
@@ -884,10 +885,8 @@ enum {
        FTRACE_ENABLE_CALLS             = (1 << 0),
        FTRACE_DISABLE_CALLS            = (1 << 1),
        FTRACE_UPDATE_TRACE_FUNC        = (1 << 2),
-        FTRACE_ENABLE_MCOUNT            = (1 << 3),
+        FTRACE_START_FUNC_RET           = (1 << 3),
-        FTRACE_DISABLE_MCOUNT           = (1 << 4),
+        FTRACE_STOP_FUNC_RET            = (1 << 4),
-        FTRACE_START_FUNC_RET           = (1 << 5),
-        FTRACE_STOP_FUNC_RET            = (1 << 6),
 };
 static int ftrace_filtered;
@@ -1226,8 +1225,6 @@ static void ftrace_shutdown(int command)
 static void ftrace_startup_sysctl(void)
 {
-        int command = FTRACE_ENABLE_MCOUNT;
        if (unlikely(ftrace_disabled))
                return;
@@ -1235,23 +1232,17 @@ static void ftrace_startup_sysctl(void)
        saved_ftrace_func = NULL;
        /* ftrace_start_up is true if we want ftrace running */
        if (ftrace_start_up)
-                command |= FTRACE_ENABLE_CALLS;
+                ftrace_run_update_code(FTRACE_ENABLE_CALLS);
-        ftrace_run_update_code(command);
 }
 static void ftrace_shutdown_sysctl(void)
 {
-        int command = FTRACE_DISABLE_MCOUNT;
        if (unlikely(ftrace_disabled))
                return;
        /* ftrace_start_up is true if ftrace is running */
        if (ftrace_start_up)
-                command |= FTRACE_DISABLE_CALLS;
+                ftrace_run_update_code(FTRACE_DISABLE_CALLS);
-        ftrace_run_update_code(command);
 }
 static cycle_t          ftrace_update_time;
@@ -1368,24 +1359,29 @@ enum {
 #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
 struct ftrace_iterator {
-        struct ftrace_page      *pg;
+        loff_t                          pos;
-        int                     hidx;
+        loff_t                          func_pos;
-        int                     idx;
+        struct ftrace_page              *pg;
-        unsigned                flags;
+        struct dyn_ftrace               *func;
-        struct trace_parser     parser;
+        struct ftrace_func_probe        *probe;
+        struct trace_parser             parser;
+        int                             hidx;
+        int                             idx;
+        unsigned                        flags;
 };
 static void *
-t_hash_next(struct seq_file *m, void *v, loff_t *pos)
+t_hash_next(struct seq_file *m, loff_t *pos)
 {
        struct ftrace_iterator *iter = m->private;
-        struct hlist_node *hnd = v;
+        struct hlist_node *hnd = NULL;
        struct hlist_head *hhd;
-        WARN_ON(!(iter->flags & FTRACE_ITER_HASH));
        (*pos)++;
+        iter->pos = *pos;
+        if (iter->probe)
+                hnd = &iter->probe->node;
 retry:
        if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
                return NULL;
@@ -1408,7 +1404,12 @@ t_hash_next(struct seq_file *m, void *v, loff_t *pos)
                }
        }
-        return hnd;
+        if (WARN_ON_ONCE(!hnd))
+                return NULL;
+        iter->probe = hlist_entry(hnd, struct ftrace_func_probe, node);
+        return iter;
 }
 static void *t_hash_start(struct seq_file *m, loff_t *pos)
@@ -1417,26 +1418,32 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
        void *p = NULL;
        loff_t l;
-        if (!(iter->flags & FTRACE_ITER_HASH))
+        if (iter->func_pos > *pos)
-                *pos = 0;
+                return NULL;
-        iter->flags |= FTRACE_ITER_HASH;
        iter->hidx = 0;
-        for (l = 0; l <= *pos; ) {
+        for (l = 0; l <= (*pos - iter->func_pos); ) {
-                p = t_hash_next(m, p, &l);
+                p = t_hash_next(m, &l);
                if (!p)
                        break;
        }
-        return p;
+        if (!p)
+                return NULL;
+        /* Only set this if we have an item */
+        iter->flags |= FTRACE_ITER_HASH;
+        return iter;
 }
-static int t_hash_show(struct seq_file *m, void *v)
+static int
+t_hash_show(struct seq_file *m, struct ftrace_iterator *iter)
 {
        struct ftrace_func_probe *rec;
-        struct hlist_node *hnd = v;
-        rec = hlist_entry(hnd, struct ftrace_func_probe, node);
+        rec = iter->probe;
+        if (WARN_ON_ONCE(!rec))
+                return -EIO;
        if (rec->ops->print)
                return rec->ops->print(m, rec->ip, rec->ops, rec->data);
@@ -1457,12 +1464,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
        struct dyn_ftrace *rec = NULL;
        if (iter->flags & FTRACE_ITER_HASH)
-                return t_hash_next(m, v, pos);
+                return t_hash_next(m, pos);
        (*pos)++;
+        iter->pos = *pos;
        if (iter->flags & FTRACE_ITER_PRINTALL)
-                return NULL;
+                return t_hash_start(m, pos);
 retry:
        if (iter->idx >= iter->pg->index) {
@@ -1491,7 +1499,20 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
                }
        }
-        return rec;
+        if (!rec)
+                return t_hash_start(m, pos);
+        iter->func_pos = *pos;
+        iter->func = rec;
+        return iter;
+}
+static void reset_iter_read(struct ftrace_iterator *iter)
+{
+        iter->pos = 0;
+        iter->func_pos = 0;
+        iter->flags &= ~(FTRACE_ITER_PRINTALL & FTRACE_ITER_HASH);
 }
 static void *t_start(struct seq_file *m, loff_t *pos)
@@ -1502,6 +1523,12 @@ static void *t_start(struct seq_file *m, loff_t *pos)
        mutex_lock(&ftrace_lock);
        /*
+         * If an lseek was done, then reset and start from beginning.
+         */
+        if (*pos < iter->pos)
+                reset_iter_read(iter);
+        /*
         * For set_ftrace_filter reading, if we have the filter
         * off, we can short cut and just print out that all
         * functions are enabled.
@@ -1518,6 +1545,11 @@ static void *t_start(struct seq_file *m, loff_t *pos)
        if (iter->flags & FTRACE_ITER_HASH)
                return t_hash_start(m, pos);
+        /*
+         * Unfortunately, we need to restart at ftrace_pages_start
+         * every time we let go of the ftrace_mutex. This is because
+         * those pointers can change without the lock.
+         */
        iter->pg = ftrace_pages_start;
        iter->idx = 0;
        for (l = 0; l <= *pos; ) {
@@ -1526,10 +1558,14 @@ static void *t_start(struct seq_file *m, loff_t *pos)
                        break;
        }
-        if (!p && iter->flags & FTRACE_ITER_FILTER)
+        if (!p) {
-                return t_hash_start(m, pos);
+                if (iter->flags & FTRACE_ITER_FILTER)
+                        return t_hash_start(m, pos);
+                return NULL;
+        }
-        return p;
+        return iter;
 }
 static void t_stop(struct seq_file *m, void *p)
@@ -1540,16 +1576,18 @@ static void t_stop(struct seq_file *m, void *p)
 static int t_show(struct seq_file *m, void *v)
 {
        struct ftrace_iterator *iter = m->private;
-        struct dyn_ftrace *rec = v;
+        struct dyn_ftrace *rec;
        if (iter->flags & FTRACE_ITER_HASH)
-                return t_hash_show(m, v);
+                return t_hash_show(m, iter);
        if (iter->flags & FTRACE_ITER_PRINTALL) {
                seq_printf(m, "#### all functions enabled ####\n");
                return 0;
        }
+        rec = iter->func;
        if (!rec)
                return 0;
@@ -1601,8 +1639,8 @@ ftrace_failures_open(struct inode *inode, struct file *file)
        ret = ftrace_avail_open(inode, file);
        if (!ret) {
-                m = (struct seq_file *)file->private_data;
+                m = file->private_data;
-                iter = (struct ftrace_iterator *)m->private;
+                iter = m->private;
                iter->flags = FTRACE_ITER_FAILURES;
        }
@@ -2418,7 +2456,7 @@ static const struct file_operations ftrace_filter_fops = {
        .open = ftrace_filter_open,
        .read = seq_read,
        .write = ftrace_filter_write,
-        .llseek = no_llseek,
+        .llseek = ftrace_regex_lseek,
        .release = ftrace_filter_release,
 };
@@ -2632,6 +2670,7 @@ static const struct file_operations ftrace_graph_fops = {
        .read           = seq_read,
        .write          = ftrace_graph_write,
        .release        = ftrace_graph_release,
+        .llseek         = seq_lseek,
 };
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bca96377fd4e..9ed509a015d8 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -224,6 +224,9 @@ enum {
        RB_LEN_TIME_STAMP = 16,
 };
+#define skip_time_extend(event) \
+        ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
 static inline int rb_null_event(struct ring_buffer_event *event)
 {
        return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event)
        return length + RB_EVNT_HDR_SIZE;
 }
-/* inline for ring buffer fast paths */
+/*
-static unsigned
+ * Return the length of the given event. Will return
+ * the length of the time extend if the event is a
+ * time extend.
+ */
+static inline unsigned
 rb_event_length(struct ring_buffer_event *event)
 {
        switch (event->type_len) {
@@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event)
        return 0;
 }
+/*
+ * Return total length of time extend and data,
+ *   or just the event length for all other events.
+ */
+static inline unsigned
+rb_event_ts_length(struct ring_buffer_event *event)
+{
+        unsigned len = 0;
+        if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+                /* time extends include the data event after it */
+                len = RB_LEN_TIME_EXTEND;
+                event = skip_time_extend(event);
+        }
+        return len + rb_event_length(event);
+}
 /**
 * ring_buffer_event_length - return the length of the event
 * @event: the event to get the length of
+ *
+ * Returns the size of the data load of a data event.
+ * If the event is something other than a data event, it
+ * returns the size of the event itself. With the exception
+ * of a TIME EXTEND, where it still returns the size of the
+ * data load of the data event after it.
 */
 unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 {
-        unsigned length = rb_event_length(event);
+        unsigned length;
+        if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+                event = skip_time_extend(event);
+        length = rb_event_length(event);
        if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
                return length;
        length -= RB_EVNT_HDR_SIZE;
@@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
 static void *
 rb_event_data(struct ring_buffer_event *event)
 {
+        if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+                event = skip_time_extend(event);
        BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
        /* If length is in len field, then array[0] has the data */
        if (event->type_len)
@@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta)
 /* Max payload is BUF_PAGE_SIZE - header (8bytes) */
 #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
-/* Max number of timestamps that can fit on a page */
-#define RB_TIMESTAMPS_PER_PAGE  (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND)
 int ring_buffer_print_page_header(struct trace_seq *s)
 {
        struct buffer_data_page field;
@@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
        iter->head = 0;
 }
+/* Slow path, do not inline */
+static noinline struct ring_buffer_event *
+rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
+{
+        event->type_len = RINGBUF_TYPE_TIME_EXTEND;
+        /* Not the first event on the page? */
+        if (rb_event_index(event)) {
+                event->time_delta = delta & TS_MASK;
+                event->array[0] = delta >> TS_SHIFT;
+        } else {
+                /* nope, just zero it */
+                event->time_delta = 0;
+                event->array[0] = 0;
+        }
+        return skip_time_extend(event);
+}
 /**
 * ring_buffer_update_event - update event type and data
 * @event: the even to update
@@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
 * data field.
 */
 static void
-rb_update_event(struct ring_buffer_event *event,
+rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
-                         unsigned type, unsigned length)
+                struct ring_buffer_event *event, unsigned length,
+                int add_timestamp, u64 delta)
 {
-        event->type_len = type;
+        /* Only a commit updates the timestamp */
+        if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
-        switch (type) {
+                delta = 0;
-        case RINGBUF_TYPE_PADDING:
-        case RINGBUF_TYPE_TIME_EXTEND:
-        case RINGBUF_TYPE_TIME_STAMP:
-                break;
-        case 0:
+        /*
-                length -= RB_EVNT_HDR_SIZE;
+         * If we need to add a timestamp, then we
-                if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
+         * add it to the start of the resevered space.
-                        event->array[0] = length;
+         */
-                else
+        if (unlikely(add_timestamp)) {
-                        event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
+                event = rb_add_time_stamp(event, delta);
-                break;
+                length -= RB_LEN_TIME_EXTEND;
-        default:
+                delta = 0;
-                BUG();
        }
+        event->time_delta = delta;
+        length -= RB_EVNT_HDR_SIZE;
+        if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
+                event->type_len = 0;
+                event->array[0] = length;
+        } else
+                event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
 }
 /*
@@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
        local_sub(length, &tail_page->write);
 }
-static struct ring_buffer_event *
+/*
+ * This is the slow path, force gcc not to inline it.
+ */
+static noinline struct ring_buffer_event *
 rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
             unsigned long length, unsigned long tail,
-             struct buffer_page *tail_page, u64 *ts)
+             struct buffer_page *tail_page, u64 ts)
 {
        struct buffer_page *commit_page = cpu_buffer->commit_page;
        struct ring_buffer *buffer = cpu_buffer->buffer;
@@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
                 * Nested commits always have zero deltas, so
                 * just reread the time stamp
                 */
-                *ts = rb_time_stamp(buffer);
+                ts = rb_time_stamp(buffer);
-                next_page->page->time_stamp = *ts;
+                next_page->page->time_stamp = ts;
        }
 out_again:
@@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 static struct ring_buffer_event *
 __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
-                  unsigned type, unsigned long length, u64 *ts)
+                  unsigned long length, u64 ts,
+                  u64 delta, int add_timestamp)
 {
        struct buffer_page *tail_page;
        struct ring_buffer_event *event;
        unsigned long tail, write;
+        /*
+         * If the time delta since the last event is too big to
+         * hold in the time field of the event, then we append a
+         * TIME EXTEND event ahead of the data event.
+         */
+        if (unlikely(add_timestamp))
+                length += RB_LEN_TIME_EXTEND;
        tail_page = cpu_buffer->tail_page;
        write = local_add_return(length, &tail_page->write);
@@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
        tail = write - length;
        /* See if we shot pass the end of this buffer page */
-        if (write > BUF_PAGE_SIZE)
+        if (unlikely(write > BUF_PAGE_SIZE))
                return rb_move_tail(cpu_buffer, length, tail,
                                    tail_page, ts);
@@ -1951,18 +2019,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
        event = __rb_page_index(tail_page, tail);
        kmemcheck_annotate_bitfield(event, bitfield);
-        rb_update_event(event, type, length);
+        rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
-        /* The passed in type is zero for DATA */
+        local_inc(&tail_page->entries);
-        if (likely(!type))
-                local_inc(&tail_page->entries);
        /*
         * If this is the first commit on the page, then update
         * its timestamp.
         */
        if (!tail)
-                tail_page->page->time_stamp = *ts;
+                tail_page->page->time_stamp = ts;
        return event;
 }
@@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
        unsigned long addr;
        new_index = rb_event_index(event);
-        old_index = new_index + rb_event_length(event);
+        old_index = new_index + rb_event_ts_length(event);
        addr = (unsigned long)event;
        addr &= PAGE_MASK;
@@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
        return 0;
 }
-static int
-rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
-                  u64 *ts, u64 *delta)
-{
-        struct ring_buffer_event *event;
-        int ret;
-        WARN_ONCE(*delta > (1ULL << 59),
-                  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
-                  (unsigned long long)*delta,
-                  (unsigned long long)*ts,
-                  (unsigned long long)cpu_buffer->write_stamp);
-        /*
-         * The delta is too big, we to add a
-         * new timestamp.
-         */
-        event = __rb_reserve_next(cpu_buffer,
-                                  RINGBUF_TYPE_TIME_EXTEND,
-                                  RB_LEN_TIME_EXTEND,
-                                  ts);
-        if (!event)
-                return -EBUSY;
-        if (PTR_ERR(event) == -EAGAIN)
-                return -EAGAIN;
-        /* Only a commited time event can update the write stamp */
-        if (rb_event_is_commit(cpu_buffer, event)) {
-                /*
-                 * If this is the first on the page, then it was
-                 * updated with the page itself. Try to discard it
-                 * and if we can't just make it zero.
-                 */
-                if (rb_event_index(event)) {
-                        event->time_delta = *delta & TS_MASK;
-                        event->array[0] = *delta >> TS_SHIFT;
-                } else {
-                        /* try to discard, since we do not need this */
-                        if (!rb_try_to_discard(cpu_buffer, event)) {
-                                /* nope, just zero it */
-                                event->time_delta = 0;
-                                event->array[0] = 0;
-                        }
-                }
-                cpu_buffer->write_stamp = *ts;
-                /* let the caller know this was the commit */
-                ret = 1;
-        } else {
-                /* Try to discard the event */
-                if (!rb_try_to_discard(cpu_buffer, event)) {
-                        /* Darn, this is just wasted space */
-                        event->time_delta = 0;
-                        event->array[0] = 0;
-                }
-                ret = 0;
-        }
-        *delta = 0;
-        return ret;
-}
 static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
 {
        local_inc(&cpu_buffer->committing);
        local_inc(&cpu_buffer->commits);
 }
-static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
+static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
 {
        unsigned long commits;
@@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer,
                      unsigned long length)
 {
        struct ring_buffer_event *event;
-        u64 ts, delta = 0;
+        u64 ts, delta;
-        int commit = 0;
        int nr_loops = 0;
+        int add_timestamp;
+        u64 diff;
        rb_start_commit(cpu_buffer);
@@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
        length = rb_calculate_event_length(length);
 again:
+        add_timestamp = 0;
+        delta = 0;
        /*
         * We allow for interrupts to reenter here and do a trace.
         * If one does, it will cause this original code to loop
@@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer,
                goto out_fail;
        ts = rb_time_stamp(cpu_buffer->buffer);
+        diff = ts - cpu_buffer->write_stamp;
-        /*
+        /* make sure this diff is calculated here */
-         * Only the first commit can update the timestamp.
+        barrier();
-         * Yes there is a race here. If an interrupt comes in
-         * just after the conditional and it traces too, then it
-         * will also check the deltas. More than one timestamp may
-         * also be made. But only the entry that did the actual
-         * commit will be something other than zero.
-         */
-        if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
-                   rb_page_write(cpu_buffer->tail_page) ==
-                   rb_commit_index(cpu_buffer))) {
-                u64 diff;
-                diff = ts - cpu_buffer->write_stamp;
-                /* make sure this diff is calculated here */
-                barrier();
-                /* Did the write stamp get updated already? */
-                if (unlikely(ts < cpu_buffer->write_stamp))
-                        goto get_event;
+        /* Did the write stamp get updated already? */
+        if (likely(ts >= cpu_buffer->write_stamp)) {
                delta = diff;
                if (unlikely(test_time_stamp(delta))) {
+                        WARN_ONCE(delta > (1ULL << 59),
-                        commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
+                                  KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
-                        if (commit == -EBUSY)
+                                  (unsigned long long)delta,
-                                goto out_fail;
+                                  (unsigned long long)ts,
+                                  (unsigned long long)cpu_buffer->write_stamp);
-                        if (commit == -EAGAIN)
+                        add_timestamp = 1;
-                                goto again;
-                        RB_WARN_ON(cpu_buffer, commit < 0);
                }
        }
- get_event:
+        event = __rb_reserve_next(cpu_buffer, length, ts,
-        event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
+                                  delta, add_timestamp);
        if (unlikely(PTR_ERR(event) == -EAGAIN))
                goto again;
        if (!event)
                goto out_fail;
-        if (!rb_event_is_commit(cpu_buffer, event))
-                delta = 0;
-        event->time_delta = delta;
        return event;
 out_fail:
@@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 #define TRACE_RECURSIVE_DEPTH 16
-static int trace_recursive_lock(void)
+/* Keep this code out of the fast path cache */
+static noinline void trace_recursive_fail(void)
 {
-        current->trace_recursion++;
-        if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
-                return 0;
        /* Disable all tracing before we do anything else */
        tracing_off_permanent();
@@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void)
                    in_nmi());
        WARN_ON_ONCE(1);
+}
+static inline int trace_recursive_lock(void)
+{
+        current->trace_recursion++;
+        if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
+                return 0;
+        trace_recursive_fail();
        return -1;
 }
-static void trace_recursive_unlock(void)
+static inline void trace_recursive_unlock(void)
 {
        WARN_ON_ONCE(!current->trace_recursion);
@@ -2308,12 +2298,28 @@ static void
 rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
                      struct ring_buffer_event *event)
 {
+        u64 delta;
        /*
         * The event first in the commit queue updates the
         * time stamp.
         */
-        if (rb_event_is_commit(cpu_buffer, event))
+        if (rb_event_is_commit(cpu_buffer, event)) {
-                cpu_buffer->write_stamp += event->time_delta;
+                /*
+                 * A commit event that is first on a page
+                 * updates the write timestamp with the page stamp
+                 */
+                if (!rb_event_index(event))
+                        cpu_buffer->write_stamp =
+                                cpu_buffer->commit_page->page->time_stamp;
+                else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+                        delta = event->array[0];
+                        delta <<= TS_SHIFT;
+                        delta += event->time_delta;
+                        cpu_buffer->write_stamp += delta;
+                } else
+                        cpu_buffer->write_stamp += event->time_delta;
+        }
 }
 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
@@ -2353,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
 static inline void rb_event_discard(struct ring_buffer_event *event)
 {
+        if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+                event = skip_time_extend(event);
        /* array[0] holds the actual length for the discarded event */
        event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
        event->type_len = RINGBUF_TYPE_PADDING;
@@ -2606,6 +2615,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
+/*
+ * The total entries in the ring buffer is the running counter
+ * of entries entered into the ring buffer, minus the sum of
+ * the entries read from the ring buffer and the number of
+ * entries that were overwritten.
+ */
+static inline unsigned long
+rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
+{
+        return local_read(&cpu_buffer->entries) -
+                (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
+}
 /**
 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
 * @buffer: The ring buffer
@@ -2614,16 +2636,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
 unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
-        unsigned long ret;
        if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return 0;
        cpu_buffer = buffer->buffers[cpu];
-        ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
-                - cpu_buffer->read;
-        return ret;
+        return rb_num_of_entries(cpu_buffer);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
@@ -2684,8 +2703,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
        /* if you care about this being correct, lock the buffer */
        for_each_buffer_cpu(buffer, cpu) {
                cpu_buffer = buffer->buffers[cpu];
-                entries += (local_read(&cpu_buffer->entries) -
+                entries += rb_num_of_entries(cpu_buffer);
-                            local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
        }
        return entries;
@@ -3040,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
 again:
        /*
-         * We repeat when a timestamp is encountered. It is possible
+         * We repeat when a time extend is encountered.
-         * to get multiple timestamps from an interrupt entering just
+         * Since the time extend is always attached to a data event,
-         * as one timestamp is about to be written, or from discarded
+         * we should never loop more than once.
-         * commits. The most that we can have is the number on a single page.
+         * (We never hit the following condition more than twice).
         */
-        if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
+        if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
                return NULL;
        reader = rb_get_reader_page(cpu_buffer);
@@ -3121,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
                return NULL;
        /*
-         * We repeat when a timestamp is encountered.
+         * We repeat when a time extend is encountered.
-         * We can get multiple timestamps by nested interrupts or also
+         * Since the time extend is always attached to a data event,
-         * if filtering is on (discarding commits). Since discarding
+         * we should never loop more than once.
-         * commits can be frequent we can get a lot of timestamps.
+         * (We never hit the following condition more than twice).
-         * But we limit them by not adding timestamps if they begin
-         * at the start of a page.
         */
-        if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
+        if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
                return NULL;
        if (rb_per_cpu_empty(cpu_buffer))
@@ -3826,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
                if (len > (commit - read))
                        len = (commit - read);
-                size = rb_event_length(event);
+                /* Always keep the time extend and data together */
+                size = rb_event_ts_length(event);
                if (len < size)
                        goto out_unlock;
@@ -3848,7 +3865,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
                                break;
                        event = rb_reader_event(cpu_buffer);
-                        size = rb_event_length(event);
+                        /* Always keep the time extend and data together */
+                        size = rb_event_ts_length(event);
                } while (len > size);
                /* update bpage */
@@ -3965,6 +3983,7 @@ static const struct file_operations rb_simple_fops = {
        .open           = tracing_open_generic,
        .read           = rb_simple_read,
        .write          = rb_simple_write,
+        .llseek         = default_llseek,
 };
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9ec59f541156..82d9b8106cd0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2196,7 +2196,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
 static int tracing_release(struct inode *inode, struct file *file)
 {
-        struct seq_file *m = (struct seq_file *)file->private_data;
+        struct seq_file *m = file->private_data;
        struct trace_iterator *iter;
        int cpu;
@@ -3996,13 +3996,9 @@ static void tracing_init_debugfs_percpu(long cpu)
 {
        struct dentry *d_percpu = tracing_dentry_percpu();
        struct dentry *d_cpu;
-        /* strlen(cpu) + MAX(log10(cpu)) + '\0' */
+        char cpu_dir[30]; /* 30 characters should be more than enough */
-        char cpu_dir[7];
-        if (cpu > 999 || cpu < 0)
+        snprintf(cpu_dir, 30, "cpu%ld", cpu);
-                return;
-        sprintf(cpu_dir, "cpu%ld", cpu);
        d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
        if (!d_cpu) {
                pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d39b3c5454a5..9021f8c0c0c3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -343,6 +343,10 @@ void trace_function(struct trace_array *tr,
                    unsigned long ip,
                    unsigned long parent_ip,
                    unsigned long flags, int pc);
+void trace_graph_function(struct trace_array *tr,
+                    unsigned long ip,
+                    unsigned long parent_ip,
+                    unsigned long flags, int pc);
 void trace_default_header(struct seq_file *m);
 void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
 int trace_empty(struct trace_iterator *iter);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 31cc4cb0dbf2..39c059ca670e 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,7 +9,7 @@
 #include <linux/kprobes.h>
 #include "trace.h"
-static char *perf_trace_buf[4];
+static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
 /*
 * Force it to be aligned to unsigned long to avoid misaligned accesses
@@ -24,7 +24,7 @@ static int	total_ref_count;
 static int perf_trace_event_init(struct ftrace_event_call *tp_event,
                                 struct perf_event *p_event)
 {
-        struct hlist_head *list;
+        struct hlist_head __percpu *list;
        int ret = -ENOMEM;
        int cpu;
@@ -42,11 +42,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
        tp_event->perf_events = list;
        if (!total_ref_count) {
-                char *buf;
+                char __percpu *buf;
                int i;
-                for (i = 0; i < 4; i++) {
+                for (i = 0; i < PERF_NR_CONTEXTS; i++) {
-                        buf = (char *)alloc_percpu(perf_trace_t);
+                        buf = (char __percpu *)alloc_percpu(perf_trace_t);
                        if (!buf)
                                goto fail;
@@ -65,7 +65,7 @@ fail:
        if (!total_ref_count) {
                int i;
-                for (i = 0; i < 4; i++) {
+                for (i = 0; i < PERF_NR_CONTEXTS; i++) {
                        free_percpu(perf_trace_buf[i]);
                        perf_trace_buf[i] = NULL;
                }
@@ -101,22 +101,26 @@ int perf_trace_init(struct perf_event *p_event)
        return ret;
 }
-int perf_trace_enable(struct perf_event *p_event)
+int perf_trace_add(struct perf_event *p_event, int flags)
 {
        struct ftrace_event_call *tp_event = p_event->tp_event;
+        struct hlist_head __percpu *pcpu_list;
        struct hlist_head *list;
-        list = tp_event->perf_events;
+        pcpu_list = tp_event->perf_events;
-        if (WARN_ON_ONCE(!list))
+        if (WARN_ON_ONCE(!pcpu_list))
                return -EINVAL;
-        list = this_cpu_ptr(list);
+        if (!(flags & PERF_EF_START))
+                p_event->hw.state = PERF_HES_STOPPED;
+        list = this_cpu_ptr(pcpu_list);
        hlist_add_head_rcu(&p_event->hlist_entry, list);
        return 0;
 }
-void perf_trace_disable(struct perf_event *p_event)
+void perf_trace_del(struct perf_event *p_event, int flags)
 {
        hlist_del_rcu(&p_event->hlist_entry);
 }
@@ -142,7 +146,7 @@ void perf_trace_destroy(struct perf_event *p_event)
        tp_event->perf_events = NULL;
        if (!--total_ref_count) {
-                for (i = 0; i < 4; i++) {
+                for (i = 0; i < PERF_NR_CONTEXTS; i++) {
                        free_percpu(perf_trace_buf[i]);
                        perf_trace_buf[i] = NULL;
                }
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 4c758f146328..0725eeab1937 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -600,21 +600,29 @@ out:
 enum {
        FORMAT_HEADER           = 1,
-        FORMAT_PRINTFMT         = 2,
+        FORMAT_FIELD_SEPERATOR  = 2,
+        FORMAT_PRINTFMT         = 3,
 };
 static void *f_next(struct seq_file *m, void *v, loff_t *pos)
 {
        struct ftrace_event_call *call = m->private;
        struct ftrace_event_field *field;
-        struct list_head *head;
+        struct list_head *common_head = &ftrace_common_fields;
+        struct list_head *head = trace_get_fields(call);
        (*pos)++;
        switch ((unsigned long)v) {
        case FORMAT_HEADER:
-                head = &ftrace_common_fields;
+                if (unlikely(list_empty(common_head)))
+                        return NULL;
+                field = list_entry(common_head->prev,
+                                   struct ftrace_event_field, link);
+                return field;
+        case FORMAT_FIELD_SEPERATOR:
                if (unlikely(list_empty(head)))
                        return NULL;
@@ -626,31 +634,10 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos)
                return NULL;
        }
-        head = trace_get_fields(call);
-        /*
-         * To separate common fields from event fields, the
-         * LSB is set on the first event field. Clear it in case.
-         */
-        v = (void *)((unsigned long)v & ~1L);
        field = v;
-        /*
+        if (field->link.prev == common_head)
-         * If this is a common field, and at the end of the list, then
+                return (void *)FORMAT_FIELD_SEPERATOR;
-         * continue with main list.
+        else if (field->link.prev == head)
-         */
-        if (field->link.prev == &ftrace_common_fields) {
-                if (unlikely(list_empty(head)))
-                        return NULL;
-                field = list_entry(head->prev, struct ftrace_event_field, link);
-                /* Set the LSB to notify f_show to print an extra newline */
-                field = (struct ftrace_event_field *)
-                        ((unsigned long)field | 1);
-                return field;
-        }
-        /* If we are done tell f_show to print the format */
-        if (field->link.prev == head)
                return (void *)FORMAT_PRINTFMT;
        field = list_entry(field->link.prev, struct ftrace_event_field, link);
@@ -688,22 +675,16 @@ static int f_show(struct seq_file *m, void *v)
                seq_printf(m, "format:\n");
                return 0;
+        case FORMAT_FIELD_SEPERATOR:
+                seq_putc(m, '\n');
+                return 0;
        case FORMAT_PRINTFMT:
                seq_printf(m, "\nprint fmt: %s\n",
                           call->print_fmt);
                return 0;
        }
-        /*
-         * To separate common fields from event fields, the
-         * LSB is set on the first event field. Clear it and
-         * print a newline if it is set.
-         */
-        if ((unsigned long)v & 1) {
-                seq_putc(m, '\n');
-                v = (void *)((unsigned long)v & ~1L);
-        }
        field = v;
        /*
@@ -951,6 +932,7 @@ static const struct file_operations ftrace_enable_fops = {
        .open = tracing_open_generic,
        .read = event_enable_read,
        .write = event_enable_write,
+        .llseek = default_llseek,
 };
 static const struct file_operations ftrace_event_format_fops = {
@@ -963,29 +945,34 @@ static const struct file_operations ftrace_event_format_fops = {
 static const struct file_operations ftrace_event_id_fops = {
        .open = tracing_open_generic,
        .read = event_id_read,
+        .llseek = default_llseek,
 };
 static const struct file_operations ftrace_event_filter_fops = {
        .open = tracing_open_generic,
        .read = event_filter_read,
        .write = event_filter_write,
+        .llseek = default_llseek,
 };
 static const struct file_operations ftrace_subsystem_filter_fops = {
        .open = tracing_open_generic,
        .read = subsystem_filter_read,
        .write = subsystem_filter_write,
+        .llseek = default_llseek,
 };
 static const struct file_operations ftrace_system_enable_fops = {
        .open = tracing_open_generic,
        .read = system_enable_read,
        .write = system_enable_write,
+        .llseek = default_llseek,
 };
 static const struct file_operations ftrace_show_header_fops = {
        .open = tracing_open_generic,
        .read = show_header,
+        .llseek = default_llseek,
 };
 static struct dentry *event_trace_events_dir(void)
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 6f233698518e..76b05980225c 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -15,15 +15,19 @@
 #include "trace.h"
 #include "trace_output.h"
+/* When set, irq functions will be ignored */
+static int ftrace_graph_skip_irqs;
 struct fgraph_cpu_data {
        pid_t           last_pid;
        int             depth;
+        int             depth_irq;
        int             ignore;
        unsigned long   enter_funcs[FTRACE_RETFUNC_DEPTH];
 };
 struct fgraph_data {
-        struct fgraph_cpu_data          *cpu_data;
+        struct fgraph_cpu_data __percpu *cpu_data;
        /* Place to preserve last processed entry. */
        struct ftrace_graph_ent_entry   ent;
@@ -41,6 +45,7 @@ struct fgraph_data {
 #define TRACE_GRAPH_PRINT_PROC          0x8
 #define TRACE_GRAPH_PRINT_DURATION      0x10
 #define TRACE_GRAPH_PRINT_ABS_TIME      0x20
+#define TRACE_GRAPH_PRINT_IRQS          0x40
 static struct tracer_opt trace_opts[] = {
        /* Display overruns? (for self-debug purpose) */
@@ -55,13 +60,15 @@ static struct tracer_opt trace_opts[] = {
        { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
        /* Display absolute time of an entry */
        { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
+        /* Display interrupts */
+        { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
        { } /* Empty entry */
 };
 static struct tracer_flags tracer_flags = {
        /* Don't display overruns and proc by default */
        .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
-               TRACE_GRAPH_PRINT_DURATION,
+               TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
        .opts = trace_opts
 };
@@ -204,6 +211,14 @@ int __trace_graph_entry(struct trace_array *tr,
        return 1;
 }
+static inline int ftrace_graph_ignore_irqs(void)
+{
+        if (!ftrace_graph_skip_irqs)
+                return 0;
+        return in_irq();
+}
 int trace_graph_entry(struct ftrace_graph_ent *trace)
 {
        struct trace_array *tr = graph_array;
@@ -218,7 +233,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
                return 0;
        /* trace it when it is-nested-in or is a function enabled. */
-        if (!(trace->depth || ftrace_graph_addr(trace->func)))
+        if (!(trace->depth || ftrace_graph_addr(trace->func)) ||
+              ftrace_graph_ignore_irqs())
                return 0;
        local_irq_save(flags);
@@ -246,6 +262,34 @@ int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
                return trace_graph_entry(trace);
 }
+static void
+__trace_graph_function(struct trace_array *tr,
+                unsigned long ip, unsigned long flags, int pc)
+{
+        u64 time = trace_clock_local();
+        struct ftrace_graph_ent ent = {
+                .func  = ip,
+                .depth = 0,
+        };
+        struct ftrace_graph_ret ret = {
+                .func     = ip,
+                .depth    = 0,
+                .calltime = time,
+                .rettime  = time,
+        };
+        __trace_graph_entry(tr, &ent, flags, pc);
+        __trace_graph_return(tr, &ret, flags, pc);
+}
+void
+trace_graph_function(struct trace_array *tr,
+                unsigned long ip, unsigned long parent_ip,
+                unsigned long flags, int pc)
+{
+        __trace_graph_function(tr, ip, flags, pc);
+}
 void __trace_graph_return(struct trace_array *tr,
                                struct ftrace_graph_ret *trace,
                                unsigned long flags,
@@ -649,8 +693,9 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
        /* Print nsecs (we don't want to exceed 7 numbers) */
        if (len < 7) {
-                snprintf(nsecs_str, min(sizeof(nsecs_str), 8UL - len), "%03lu",
+                size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len);
-                         nsecs_rem);
+                snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
                ret = trace_seq_printf(s, ".%s", nsecs_str);
                if (!ret)
                        return TRACE_TYPE_PARTIAL_LINE;
@@ -855,6 +900,108 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
        return 0;
 }
+/*
+ * Entry check for irq code
+ *
+ * returns 1 if
+ *  - we are inside irq code
+ *  - we just extered irq code
+ *
+ * retunns 0 if
+ *  - funcgraph-interrupts option is set
+ *  - we are not inside irq code
+ */
+static int
+check_irq_entry(struct trace_iterator *iter, u32 flags,
+                unsigned long addr, int depth)
+{
+        int cpu = iter->cpu;
+        int *depth_irq;
+        struct fgraph_data *data = iter->private;
+        /*
+         * If we are either displaying irqs, or we got called as
+         * a graph event and private data does not exist,
+         * then we bypass the irq check.
+         */
+        if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
+            (!data))
+                return 0;
+        depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
+        /*
+         * We are inside the irq code
+         */
+        if (*depth_irq >= 0)
+                return 1;
+        if ((addr < (unsigned long)__irqentry_text_start) ||
+            (addr >= (unsigned long)__irqentry_text_end))
+                return 0;
+        /*
+         * We are entering irq code.
+         */
+        *depth_irq = depth;
+        return 1;
+}
+/*
+ * Return check for irq code
+ *
+ * returns 1 if
+ *  - we are inside irq code
+ *  - we just left irq code
+ *
+ * returns 0 if
+ *  - funcgraph-interrupts option is set
+ *  - we are not inside irq code
+ */
+static int
+check_irq_return(struct trace_iterator *iter, u32 flags, int depth)
+{
+        int cpu = iter->cpu;
+        int *depth_irq;
+        struct fgraph_data *data = iter->private;
+        /*
+         * If we are either displaying irqs, or we got called as
+         * a graph event and private data does not exist,
+         * then we bypass the irq check.
+         */
+        if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
+            (!data))
+                return 0;
+        depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
+        /*
+         * We are not inside the irq code.
+         */
+        if (*depth_irq == -1)
+                return 0;
+        /*
+         * We are inside the irq code, and this is returning entry.
+         * Let's not trace it and clear the entry depth, since
+         * we are out of irq code.
+         *
+         * This condition ensures that we 'leave the irq code' once
+         * we are out of the entry depth. Thus protecting us from
+         * the RETURN entry loss.
+         */
+        if (*depth_irq >= depth) {
+                *depth_irq = -1;
+                return 1;
+        }
+        /*
+         * We are inside the irq code, and this is not the entry.
+         */
+        return 1;
+}
 static enum print_line_t
 print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
                        struct trace_iterator *iter, u32 flags)
@@ -865,6 +1012,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
        static enum print_line_t ret;
        int cpu = iter->cpu;
+        if (check_irq_entry(iter, flags, call->func, call->depth))
+                return TRACE_TYPE_HANDLED;
        if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags))
                return TRACE_TYPE_PARTIAL_LINE;
@@ -902,6 +1052,9 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
        int ret;
        int i;
+        if (check_irq_return(iter, flags, trace->depth))
+                return TRACE_TYPE_HANDLED;
        if (data) {
                struct fgraph_cpu_data *cpu_data;
                int cpu = iter->cpu;
@@ -1054,7 +1207,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
 enum print_line_t
-print_graph_function_flags(struct trace_iterator *iter, u32 flags)
+__print_graph_function_flags(struct trace_iterator *iter, u32 flags)
 {
        struct ftrace_graph_ent_entry *field;
        struct fgraph_data *data = iter->private;
@@ -1117,7 +1270,18 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
 static enum print_line_t
 print_graph_function(struct trace_iterator *iter)
 {
-        return print_graph_function_flags(iter, tracer_flags.val);
+        return __print_graph_function_flags(iter, tracer_flags.val);
+}
+enum print_line_t print_graph_function_flags(struct trace_iterator *iter,
+                                             u32 flags)
+{
+        if (trace_flags & TRACE_ITER_LATENCY_FMT)
+                flags |= TRACE_GRAPH_PRINT_DURATION;
+        else
+                flags |= TRACE_GRAPH_PRINT_ABS_TIME;
+        return __print_graph_function_flags(iter, flags);
 }
 static enum print_line_t
@@ -1149,7 +1313,7 @@ static void print_lat_header(struct seq_file *s, u32 flags)
        seq_printf(s, "#%.*s|||| /                     \n", size, spaces);
 }
-void print_graph_headers_flags(struct seq_file *s, u32 flags)
+static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
 {
        int lat = trace_flags & TRACE_ITER_LATENCY_FMT;
@@ -1190,6 +1354,23 @@ void print_graph_headers(struct seq_file *s)
        print_graph_headers_flags(s, tracer_flags.val);
 }
+void print_graph_headers_flags(struct seq_file *s, u32 flags)
+{
+        struct trace_iterator *iter = s->private;
+        if (trace_flags & TRACE_ITER_LATENCY_FMT) {
+                /* print nothing if the buffers are empty */
+                if (trace_empty(iter))
+                        return;
+                print_trace_header(s, iter);
+                flags |= TRACE_GRAPH_PRINT_DURATION;
+        } else
+                flags |= TRACE_GRAPH_PRINT_ABS_TIME;
+        __print_graph_headers_flags(s, flags);
+}
 void graph_trace_open(struct trace_iterator *iter)
 {
        /* pid and depth on the last trace processed */
@@ -1210,9 +1391,12 @@ void graph_trace_open(struct trace_iterator *iter)
                pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
                int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
                int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
+                int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
                *pid = -1;
                *depth = 0;
                *ignore = 0;
+                *depth_irq = -1;
        }
        iter->private = data;
@@ -1235,6 +1419,14 @@ void graph_trace_close(struct trace_iterator *iter)
        }
 }
+static int func_graph_set_flag(u32 old_flags, u32 bit, int set)
+{
+        if (bit == TRACE_GRAPH_PRINT_IRQS)
+                ftrace_graph_skip_irqs = !set;
+        return 0;
+}
 static struct trace_event_functions graph_functions = {
        .trace          = print_graph_function_event,
 };
@@ -1261,6 +1453,7 @@ static struct tracer graph_trace __read_mostly = {
        .print_line     = print_graph_function,
        .print_header   = print_graph_headers,
        .flags          = &tracer_flags,
+        .set_flag       = func_graph_set_flag,
 #ifdef CONFIG_FTRACE_SELFTEST
        .selftest       = trace_selftest_startup_function_graph,
 #endif
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 73a6b0601f2e..5cf8c602b880 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -87,14 +87,22 @@ static __cacheline_aligned_in_smp	unsigned long max_sequence;
 #ifdef CONFIG_FUNCTION_TRACER
 /*
- * irqsoff uses its own tracer function to keep the overhead down:
+ * Prologue for the preempt and irqs off function tracers.
+ *
+ * Returns 1 if it is OK to continue, and data->disabled is
+ *            incremented.
+ *         0 if the trace is to be ignored, and data->disabled
+ *            is kept the same.
+ *
+ * Note, this function is also used outside this ifdef but
+ *  inside the #ifdef of the function graph tracer below.
+ *  This is OK, since the function graph tracer is
+ *  dependent on the function tracer.
 */
-static void
+static int func_prolog_dec(struct trace_array *tr,
-irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
+                           struct trace_array_cpu **data,
+                           unsigned long *flags)
 {
-        struct trace_array *tr = irqsoff_trace;
-        struct trace_array_cpu *data;
-        unsigned long flags;
        long disabled;
        int cpu;
@@ -106,18 +114,38 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
         */
        cpu = raw_smp_processor_id();
        if (likely(!per_cpu(tracing_cpu, cpu)))
-                return;
+                return 0;
-        local_save_flags(flags);
+        local_save_flags(*flags);
        /* slight chance to get a false positive on tracing_cpu */
-        if (!irqs_disabled_flags(flags))
+        if (!irqs_disabled_flags(*flags))
-                return;
+                return 0;
-        data = tr->data[cpu];
+        *data = tr->data[cpu];
-        disabled = atomic_inc_return(&data->disabled);
+        disabled = atomic_inc_return(&(*data)->disabled);
        if (likely(disabled == 1))
-                trace_function(tr, ip, parent_ip, flags, preempt_count());
+                return 1;
+        atomic_dec(&(*data)->disabled);
+        return 0;
+}
+/*
+ * irqsoff uses its own tracer function to keep the overhead down:
+ */
+static void
+irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
+{
+        struct trace_array *tr = irqsoff_trace;
+        struct trace_array_cpu *data;
+        unsigned long flags;
+        if (!func_prolog_dec(tr, &data, &flags))
+                return;
+        trace_function(tr, ip, parent_ip, flags, preempt_count());
        atomic_dec(&data->disabled);
 }
@@ -155,30 +183,16 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
        struct trace_array *tr = irqsoff_trace;
        struct trace_array_cpu *data;
        unsigned long flags;
-        long disabled;
        int ret;
-        int cpu;
        int pc;
-        cpu = raw_smp_processor_id();
+        if (!func_prolog_dec(tr, &data, &flags))
-        if (likely(!per_cpu(tracing_cpu, cpu)))
                return 0;
-        local_save_flags(flags);
+        pc = preempt_count();
-        /* slight chance to get a false positive on tracing_cpu */
+        ret = __trace_graph_entry(tr, trace, flags, pc);
-        if (!irqs_disabled_flags(flags))
-                return 0;
-        data = tr->data[cpu];
-        disabled = atomic_inc_return(&data->disabled);
-        if (likely(disabled == 1)) {
-                pc = preempt_count();
-                ret = __trace_graph_entry(tr, trace, flags, pc);
-        } else
-                ret = 0;
        atomic_dec(&data->disabled);
        return ret;
 }
@@ -187,27 +201,13 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace)
        struct trace_array *tr = irqsoff_trace;
        struct trace_array_cpu *data;
        unsigned long flags;
-        long disabled;
-        int cpu;
        int pc;
-        cpu = raw_smp_processor_id();
+        if (!func_prolog_dec(tr, &data, &flags))
-        if (likely(!per_cpu(tracing_cpu, cpu)))
                return;
-        local_save_flags(flags);
+        pc = preempt_count();
-        /* slight chance to get a false positive on tracing_cpu */
+        __trace_graph_return(tr, trace, flags, pc);
-        if (!irqs_disabled_flags(flags))
-                return;
-        data = tr->data[cpu];
-        disabled = atomic_inc_return(&data->disabled);
-        if (likely(disabled == 1)) {
-                pc = preempt_count();
-                __trace_graph_return(tr, trace, flags, pc);
-        }
        atomic_dec(&data->disabled);
 }
@@ -229,75 +229,33 @@ static void irqsoff_trace_close(struct trace_iterator *iter)
 static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
 {
-        u32 flags = GRAPH_TRACER_FLAGS;
-        if (trace_flags & TRACE_ITER_LATENCY_FMT)
-                flags |= TRACE_GRAPH_PRINT_DURATION;
-        else
-                flags |= TRACE_GRAPH_PRINT_ABS_TIME;
        /*
         * In graph mode call the graph tracer output function,
         * otherwise go with the TRACE_FN event handler
         */
        if (is_graph())
-                return print_graph_function_flags(iter, flags);
+                return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
        return TRACE_TYPE_UNHANDLED;
 }
 static void irqsoff_print_header(struct seq_file *s)
 {
-        if (is_graph()) {
+        if (is_graph())
-                struct trace_iterator *iter = s->private;
+                print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
-                u32 flags = GRAPH_TRACER_FLAGS;
+        else
-                if (trace_flags & TRACE_ITER_LATENCY_FMT) {
-                        /* print nothing if the buffers are empty */
-                        if (trace_empty(iter))
-                                return;
-                        print_trace_header(s, iter);
-                        flags |= TRACE_GRAPH_PRINT_DURATION;
-                } else
-                        flags |= TRACE_GRAPH_PRINT_ABS_TIME;
-                print_graph_headers_flags(s, flags);
-        } else
                trace_default_header(s);
 }
 static void
-trace_graph_function(struct trace_array *tr,
-                 unsigned long ip, unsigned long flags, int pc)
-{
-        u64 time = trace_clock_local();
-        struct ftrace_graph_ent ent = {
-                .func  = ip,
-                .depth = 0,
-        };
-        struct ftrace_graph_ret ret = {
-                .func     = ip,
-                .depth    = 0,
-                .calltime = time,
-                .rettime  = time,
-        };
-        __trace_graph_entry(tr, &ent, flags, pc);
-        __trace_graph_return(tr, &ret, flags, pc);
-}
-static void
 __trace_function(struct trace_array *tr,
                 unsigned long ip, unsigned long parent_ip,
                 unsigned long flags, int pc)
 {
-        if (!is_graph())
+        if (is_graph())
+                trace_graph_function(tr, ip, parent_ip, flags, pc);
+        else
                trace_function(tr, ip, parent_ip, flags, pc);
-        else {
-                trace_graph_function(tr, parent_ip, flags, pc);
-                trace_graph_function(tr, ip, flags, pc);
-        }
 }
 #else
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
index 7b8ecd751d93..3c5c5dfea0b3 100644
--- a/kernel/trace/trace_kdb.c
+++ b/kernel/trace/trace_kdb.c
@@ -13,7 +13,6 @@
 #include <linux/kdb.h>
 #include <linux/ftrace.h>
-#include "../debug/kdb/kdb_private.h"
 #include "trace.h"
 #include "trace_output.h"
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 544301d29dee..2dec9bcde8b4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -31,7 +31,6 @@
 #include <linux/perf_event.h>
 #include <linux/stringify.h>
 #include <linux/limits.h>
-#include <linux/uaccess.h>
 #include <asm/bitsperlong.h>
 #include "trace.h"
@@ -648,7 +647,7 @@ static int register_trace_probe(struct trace_probe *tp)
        }
        ret = register_probe_event(tp);
        if (ret) {
-                pr_warning("Faild to register probe event(%d)\n", ret);
+                pr_warning("Failed to register probe event(%d)\n", ret);
                goto end;
        }
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 4086eae6e81b..7319559ed59f 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -31,48 +31,98 @@ static int			wakeup_rt;
 static arch_spinlock_t wakeup_lock =
        (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+static void wakeup_reset(struct trace_array *tr);
 static void __wakeup_reset(struct trace_array *tr);
+static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
+static void wakeup_graph_return(struct ftrace_graph_ret *trace);
 static int save_lat_flag;
+#define TRACE_DISPLAY_GRAPH     1
+static struct tracer_opt trace_opts[] = {
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+        /* display latency trace as call graph */
+        { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
+#endif
+        { } /* Empty entry */
+};
+static struct tracer_flags tracer_flags = {
+        .val  = 0,
+        .opts = trace_opts,
+};
+#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
 #ifdef CONFIG_FUNCTION_TRACER
 /*
- * irqsoff uses its own tracer function to keep the overhead down:
+ * Prologue for the wakeup function tracers.
+ *
+ * Returns 1 if it is OK to continue, and preemption
+ *            is disabled and data->disabled is incremented.
+ *         0 if the trace is to be ignored, and preemption
+ *            is not disabled and data->disabled is
+ *            kept the same.
+ *
+ * Note, this function is also used outside this ifdef but
+ *  inside the #ifdef of the function graph tracer below.
+ *  This is OK, since the function graph tracer is
+ *  dependent on the function tracer.
 */
-static void
+static int
-wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
+func_prolog_preempt_disable(struct trace_array *tr,
+                            struct trace_array_cpu **data,
+                            int *pc)
 {
-        struct trace_array *tr = wakeup_trace;
-        struct trace_array_cpu *data;
-        unsigned long flags;
        long disabled;
        int cpu;
-        int pc;
        if (likely(!wakeup_task))
-                return;
+                return 0;
-        pc = preempt_count();
+        *pc = preempt_count();
        preempt_disable_notrace();
        cpu = raw_smp_processor_id();
        if (cpu != wakeup_current_cpu)
                goto out_enable;
-        data = tr->data[cpu];
+        *data = tr->data[cpu];
-        disabled = atomic_inc_return(&data->disabled);
+        disabled = atomic_inc_return(&(*data)->disabled);
        if (unlikely(disabled != 1))
                goto out;
-        local_irq_save(flags);
+        return 1;
-        trace_function(tr, ip, parent_ip, flags, pc);
+out:
+        atomic_dec(&(*data)->disabled);
+out_enable:
+        preempt_enable_notrace();
+        return 0;
+}
+/*
+ * wakeup uses its own tracer function to keep the overhead down:
+ */
+static void
+wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
+{
+        struct trace_array *tr = wakeup_trace;
+        struct trace_array_cpu *data;
+        unsigned long flags;
+        int pc;
+        if (!func_prolog_preempt_disable(tr, &data, &pc))
+                return;
+        local_irq_save(flags);
+        trace_function(tr, ip, parent_ip, flags, pc);
        local_irq_restore(flags);
- out:
        atomic_dec(&data->disabled);
- out_enable:
        preempt_enable_notrace();
 }
@@ -82,6 +132,156 @@ static struct ftrace_ops trace_ops __read_mostly =
 };
 #endif /* CONFIG_FUNCTION_TRACER */
+static int start_func_tracer(int graph)
+{
+        int ret;
+        if (!graph)
+                ret = register_ftrace_function(&trace_ops);
+        else
+                ret = register_ftrace_graph(&wakeup_graph_return,
+                                            &wakeup_graph_entry);
+        if (!ret && tracing_is_enabled())
+                tracer_enabled = 1;
+        else
+                tracer_enabled = 0;
+        return ret;
+}
+static void stop_func_tracer(int graph)
+{
+        tracer_enabled = 0;
+        if (!graph)
+                unregister_ftrace_function(&trace_ops);
+        else
+                unregister_ftrace_graph();
+}
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
+{
+        if (!(bit & TRACE_DISPLAY_GRAPH))
+                return -EINVAL;
+        if (!(is_graph() ^ set))
+                return 0;
+        stop_func_tracer(!set);
+        wakeup_reset(wakeup_trace);
+        tracing_max_latency = 0;
+        return start_func_tracer(set);
+}
+static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
+{
+        struct trace_array *tr = wakeup_trace;
+        struct trace_array_cpu *data;
+        unsigned long flags;
+        int pc, ret = 0;
+        if (!func_prolog_preempt_disable(tr, &data, &pc))
+                return 0;
+        local_save_flags(flags);
+        ret = __trace_graph_entry(tr, trace, flags, pc);
+        atomic_dec(&data->disabled);
+        preempt_enable_notrace();
+        return ret;
+}
+static void wakeup_graph_return(struct ftrace_graph_ret *trace)
+{
+        struct trace_array *tr = wakeup_trace;
+        struct trace_array_cpu *data;
+        unsigned long flags;
+        int pc;
+        if (!func_prolog_preempt_disable(tr, &data, &pc))
+                return;
+        local_save_flags(flags);
+        __trace_graph_return(tr, trace, flags, pc);
+        atomic_dec(&data->disabled);
+        preempt_enable_notrace();
+        return;
+}
+static void wakeup_trace_open(struct trace_iterator *iter)
+{
+        if (is_graph())
+                graph_trace_open(iter);
+}
+static void wakeup_trace_close(struct trace_iterator *iter)
+{
+        if (iter->private)
+                graph_trace_close(iter);
+}
+#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC)
+static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
+{
+        /*
+         * In graph mode call the graph tracer output function,
+         * otherwise go with the TRACE_FN event handler
+         */
+        if (is_graph())
+                return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
+        return TRACE_TYPE_UNHANDLED;
+}
+static void wakeup_print_header(struct seq_file *s)
+{
+        if (is_graph())
+                print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
+        else
+                trace_default_header(s);
+}
+static void
+__trace_function(struct trace_array *tr,
+                 unsigned long ip, unsigned long parent_ip,
+                 unsigned long flags, int pc)
+{
+        if (is_graph())
+                trace_graph_function(tr, ip, parent_ip, flags, pc);
+        else
+                trace_function(tr, ip, parent_ip, flags, pc);
+}
+#else
+#define __trace_function trace_function
+static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
+{
+        return -EINVAL;
+}
+static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
+{
+        return -1;
+}
+static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
+{
+        return TRACE_TYPE_UNHANDLED;
+}
+static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
+static void wakeup_print_header(struct seq_file *s) { }
+static void wakeup_trace_open(struct trace_iterator *iter) { }
+static void wakeup_trace_close(struct trace_iterator *iter) { }
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 /*
 * Should this new latency be reported/recorded?
 */
@@ -152,7 +352,7 @@ probe_wakeup_sched_switch(void *ignore,
        /* The task we are waiting for is waking up */
        data = wakeup_trace->data[wakeup_cpu];
-        trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
+        __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
        tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
        T0 = data->preempt_timestamp;
@@ -252,7 +452,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)
         * is not called by an assembly function  (where as schedule is)
         * it should be safe to use it here.
         */
-        trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
+        __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
 out_locked:
        arch_spin_unlock(&wakeup_lock);
@@ -303,12 +503,8 @@ static void start_wakeup_tracer(struct trace_array *tr)
         */
        smp_wmb();
-        register_ftrace_function(&trace_ops);
+        if (start_func_tracer(is_graph()))
+                printk(KERN_ERR "failed to start wakeup tracer\n");
-        if (tracing_is_enabled())
-                tracer_enabled = 1;
-        else
-                tracer_enabled = 0;
        return;
 fail_deprobe_wake_new:
@@ -320,7 +516,7 @@ fail_deprobe:
 static void stop_wakeup_tracer(struct trace_array *tr)
 {
        tracer_enabled = 0;
-        unregister_ftrace_function(&trace_ops);
+        stop_func_tracer(is_graph());
        unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
        unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
        unregister_trace_sched_wakeup(probe_wakeup, NULL);
@@ -379,9 +575,15 @@ static struct tracer wakeup_tracer __read_mostly =
        .start          = wakeup_tracer_start,
        .stop           = wakeup_tracer_stop,
        .print_max      = 1,
+        .print_header   = wakeup_print_header,
+        .print_line     = wakeup_print_line,
+        .flags          = &tracer_flags,
+        .set_flag       = wakeup_set_flag,
 #ifdef CONFIG_FTRACE_SELFTEST
        .selftest    = trace_selftest_startup_wakeup,
 #endif
+        .open           = wakeup_trace_open,
+        .close          = wakeup_trace_close,
        .use_max_tr     = 1,
 };
@@ -394,9 +596,15 @@ static struct tracer wakeup_rt_tracer __read_mostly =
        .stop           = wakeup_tracer_stop,
        .wait_pipe      = poll_wait_pipe,
        .print_max      = 1,
+        .print_header   = wakeup_print_header,
+        .print_line     = wakeup_print_line,
+        .flags          = &tracer_flags,
+        .set_flag       = wakeup_set_flag,
 #ifdef CONFIG_FTRACE_SELFTEST
        .selftest    = trace_selftest_startup_wakeup,
 #endif
+        .open           = wakeup_trace_open,
+        .close          = wakeup_trace_close,
        .use_max_tr     = 1,
 };
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index a6b7e0e0f3eb..4c5dead0c239 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -195,6 +195,7 @@ static const struct file_operations stack_max_size_fops = {
        .open           = tracing_open_generic,
        .read           = stack_max_size_read,
        .write          = stack_max_size_write,
+        .llseek         = default_llseek,
 };
 static void *
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index a7cc3793baf6..209b379a4721 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -263,6 +263,11 @@ int __init trace_workqueue_early_init(void)
 {
        int ret, cpu;
+        for_each_possible_cpu(cpu) {
+                spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
+                INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
+        }
        ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL);
        if (ret)
                goto out;
@@ -279,11 +284,6 @@ int __init trace_workqueue_early_init(void)
        if (ret)
                goto no_creation;
-        for_each_possible_cpu(cpu) {
-                spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
-                INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
-        }
        return 0;
 no_creation:
author	Jens Axboe <jaxboe@fusionio.com>	2010-11-16 04:09:42 -0500
committer	Jens Axboe <jaxboe@fusionio.com>	2010-11-16 04:09:42 -0500
commit	a02056349cdea2252cd2b21643ebf025e83a29f2 (patch)
tree	b7c889d6cbe8e7188d07d99a5c9da858c53a5b6c /kernel/trace
parent	34db1d595ef6f183fbc1e42cda45a3dfa0035258 (diff)
parent	e53beacd23d9cb47590da6a7a7f6d417b941a994 (diff)