1 files changed, 509 insertions, 271 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 960cbf44c844..dc4dc70171ce 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -10,6 +10,7 @@
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
 #include <linux/hardirq.h>
+#include <linux/kmemcheck.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
 #include <linux/mutex.h>
@@ -22,6 +23,28 @@
 #include "trace.h"
 /*
+ * The ring buffer header is special. We must manually up keep it.
+ */
+int ring_buffer_print_entry_header(struct trace_seq *s)
+{
+        int ret;
+        ret = trace_seq_printf(s, "# compressed entry header\n");
+        ret = trace_seq_printf(s, "\ttype_len    :    5 bits\n");
+        ret = trace_seq_printf(s, "\ttime_delta  :   27 bits\n");
+        ret = trace_seq_printf(s, "\tarray       :   32 bits\n");
+        ret = trace_seq_printf(s, "\n");
+        ret = trace_seq_printf(s, "\tpadding     : type == %d\n",
+                               RINGBUF_TYPE_PADDING);
+        ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
+                               RINGBUF_TYPE_TIME_EXTEND);
+        ret = trace_seq_printf(s, "\tdata max type_len  == %d\n",
+                               RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
+        return ret;
+}
+/*
 * The ring buffer is made up of a list of pages. A separate list of pages is
 * allocated for each CPU. A writer may only write to a buffer that is
 * associated with the CPU it is currently executing on.  A reader may read
@@ -182,7 +205,10 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
 #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
 #define RB_ALIGNMENT            4U
-#define RB_MAX_SMALL_DATA       28
+#define RB_MAX_SMALL_DATA       (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
+/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
+#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
 enum {
        RB_LEN_TIME_EXTEND = 8,
@@ -191,48 +217,28 @@ enum {
 static inline int rb_null_event(struct ring_buffer_event *event)
 {
-        return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0;
+        return event->type_len == RINGBUF_TYPE_PADDING
+                        && event->time_delta == 0;
 }
 static inline int rb_discarded_event(struct ring_buffer_event *event)
 {
-        return event->type == RINGBUF_TYPE_PADDING && event->time_delta;
+        return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta;
 }
 static void rb_event_set_padding(struct ring_buffer_event *event)
 {
-        event->type = RINGBUF_TYPE_PADDING;
+        event->type_len = RINGBUF_TYPE_PADDING;
        event->time_delta = 0;
 }
-/**
- * ring_buffer_event_discard - discard an event in the ring buffer
- * @buffer: the ring buffer
- * @event: the event to discard
- *
- * Sometimes a event that is in the ring buffer needs to be ignored.
- * This function lets the user discard an event in the ring buffer
- * and then that event will not be read later.
- *
- * Note, it is up to the user to be careful with this, and protect
- * against races. If the user discards an event that has been consumed
- * it is possible that it could corrupt the ring buffer.
- */
-void ring_buffer_event_discard(struct ring_buffer_event *event)
-{
-        event->type = RINGBUF_TYPE_PADDING;
-        /* time delta must be non zero */
-        if (!event->time_delta)
-                event->time_delta = 1;
-}
 static unsigned
 rb_event_data_length(struct ring_buffer_event *event)
 {
        unsigned length;
-        if (event->len)
+        if (event->type_len)
-                length = event->len * RB_ALIGNMENT;
+                length = event->type_len * RB_ALIGNMENT;
        else
                length = event->array[0];
        return length + RB_EVNT_HDR_SIZE;
@@ -242,12 +248,12 @@ rb_event_data_length(struct ring_buffer_event *event)
 static unsigned
 rb_event_length(struct ring_buffer_event *event)
 {
-        switch (event->type) {
+        switch (event->type_len) {
        case RINGBUF_TYPE_PADDING:
                if (rb_null_event(event))
                        /* undefined */
                        return -1;
-                return rb_event_data_length(event);
+                return  event->array[0] + RB_EVNT_HDR_SIZE;
        case RINGBUF_TYPE_TIME_EXTEND:
                return RB_LEN_TIME_EXTEND;
@@ -271,7 +277,7 @@ rb_event_length(struct ring_buffer_event *event)
 unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 {
        unsigned length = rb_event_length(event);
-        if (event->type != RINGBUF_TYPE_DATA)
+        if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
                return length;
        length -= RB_EVNT_HDR_SIZE;
        if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
@@ -284,9 +290,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
 static void *
 rb_event_data(struct ring_buffer_event *event)
 {
-        BUG_ON(event->type != RINGBUF_TYPE_DATA);
+        BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
        /* If length is in len field, then array[0] has the data */
-        if (event->len)
+        if (event->type_len)
                return (void *)&event->array[0];
        /* Otherwise length is in array[0] and array[1] has the data */
        return (void *)&event->array[1];
@@ -316,9 +322,10 @@ struct buffer_data_page {
 };
 struct buffer_page {
+        struct list_head list;          /* list of buffer pages */
        local_t          write;         /* index for next write */
        unsigned         read;          /* index for next read */
-        struct list_head list;          /* list of free pages */
+        local_t          entries;       /* entries on this page */
        struct buffer_data_page *page;  /* Actual data page */
 };
@@ -361,6 +368,34 @@ static inline int test_time_stamp(u64 delta)
 #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
+/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
+#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
+/* Max number of timestamps that can fit on a page */
+#define RB_TIMESTAMPS_PER_PAGE  (BUF_PAGE_SIZE / RB_LEN_TIME_STAMP)
+int ring_buffer_print_page_header(struct trace_seq *s)
+{
+        struct buffer_data_page field;
+        int ret;
+        ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
+                               "offset:0;\tsize:%u;\n",
+                               (unsigned int)sizeof(field.time_stamp));
+        ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
+                               "offset:%u;\tsize:%u;\n",
+                               (unsigned int)offsetof(typeof(field), commit),
+                               (unsigned int)sizeof(field.commit));
+        ret = trace_seq_printf(s, "\tfield: char data;\t"
+                               "offset:%u;\tsize:%u;\n",
+                               (unsigned int)offsetof(typeof(field), data),
+                               (unsigned int)BUF_PAGE_SIZE);
+        return ret;
+}
 /*
 * head_page == tail_page && head == tail then buffer is empty.
 */
@@ -375,8 +410,11 @@ struct ring_buffer_per_cpu {
        struct buffer_page              *tail_page;     /* write to tail */
        struct buffer_page              *commit_page;   /* committed pages */
        struct buffer_page              *reader_page;
+        unsigned long                   nmi_dropped;
+        unsigned long                   commit_overrun;
        unsigned long                   overrun;
-        unsigned long                   entries;
+        unsigned long                   read;
+        local_t                         entries;
        u64                             write_stamp;
        u64                             read_stamp;
        atomic_t                        record_disabled;
@@ -389,6 +427,8 @@ struct ring_buffer {
        atomic_t                        record_disabled;
        cpumask_var_t                   cpumask;
+        struct lock_class_key           *reader_lock_key;
        struct mutex                    mutex;
        struct ring_buffer_per_cpu      **buffers;
@@ -420,13 +460,18 @@ struct ring_buffer_iter {
 /* Up this if you want to test the TIME_EXTENTS and normalization */
 #define DEBUG_SHIFT 0
+static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu)
+{
+        /* shift to debug/test normalization and TIME_EXTENTS */
+        return buffer->clock() << DEBUG_SHIFT;
+}
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
 {
        u64 time;
        preempt_disable_notrace();
-        /* shift to debug/test normalization and TIME_EXTENTS */
+        time = rb_time_stamp(buffer, cpu);
-        time = buffer->clock() << DEBUG_SHIFT;
        preempt_enable_no_resched_notrace();
        return time;
@@ -523,6 +568,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
        cpu_buffer->cpu = cpu;
        cpu_buffer->buffer = buffer;
        spin_lock_init(&cpu_buffer->reader_lock);
+        lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
        cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
        INIT_LIST_HEAD(&cpu_buffer->pages);
@@ -593,7 +639,8 @@ static int rb_cpu_notify(struct notifier_block *self,
 * when the buffer wraps. If this flag is not set, the buffer will
 * drop data when the tail hits the head.
 */
-struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
+struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
+                                        struct lock_class_key *key)
 {
        struct ring_buffer *buffer;
        int bsize;
@@ -616,6 +663,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
        buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
        buffer->flags = flags;
        buffer->clock = trace_clock_local;
+        buffer->reader_lock_key = key;
        /* need at least two pages */
        if (buffer->pages == 1)
@@ -673,7 +721,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
        kfree(buffer);
        return NULL;
 }
-EXPORT_SYMBOL_GPL(ring_buffer_alloc);
+EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
 /**
 * ring_buffer_free - free a ring buffer.
@@ -947,31 +995,6 @@ static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
        return rb_page_commit(cpu_buffer->head_page);
 }
-/*
- * When the tail hits the head and the buffer is in overwrite mode,
- * the head jumps to the next page and all content on the previous
- * page is discarded. But before doing so, we update the overrun
- * variable of the buffer.
- */
-static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
-{
-        struct ring_buffer_event *event;
-        unsigned long head;
-        for (head = 0; head < rb_head_size(cpu_buffer);
-             head += rb_event_length(event)) {
-                event = __rb_page_index(cpu_buffer->head_page, head);
-                if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
-                        return;
-                /* Only count data entries */
-                if (event->type != RINGBUF_TYPE_DATA)
-                        continue;
-                cpu_buffer->overrun++;
-                cpu_buffer->entries--;
-        }
-}
 static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
                               struct buffer_page **bpage)
 {
@@ -991,7 +1014,7 @@ rb_event_index(struct ring_buffer_event *event)
        return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
 }
-static int
+static inline int
 rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
             struct ring_buffer_event *event)
 {
@@ -1110,28 +1133,21 @@ static void
 rb_update_event(struct ring_buffer_event *event,
                         unsigned type, unsigned length)
 {
-        event->type = type;
+        event->type_len = type;
        switch (type) {
        case RINGBUF_TYPE_PADDING:
-                break;
        case RINGBUF_TYPE_TIME_EXTEND:
-                event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
-                break;
        case RINGBUF_TYPE_TIME_STAMP:
-                event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
                break;
-        case RINGBUF_TYPE_DATA:
+        case 0:
                length -= RB_EVNT_HDR_SIZE;
-                if (length > RB_MAX_SMALL_DATA) {
+                if (length > RB_MAX_SMALL_DATA)
-                        event->len = 0;
                        event->array[0] = length;
-                } else
+                else
-                        event->len = DIV_ROUND_UP(length, RB_ALIGNMENT);
+                        event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
                break;
        default:
                BUG();
@@ -1155,131 +1171,157 @@ static unsigned rb_calculate_event_length(unsigned length)
        return length;
 }
 static struct ring_buffer_event *
-__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
+rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
-                  unsigned type, unsigned long length, u64 *ts)
+             unsigned long length, unsigned long tail,
+             struct buffer_page *commit_page,
+             struct buffer_page *tail_page, u64 *ts)
 {
-        struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
+        struct buffer_page *next_page, *head_page, *reader_page;
-        unsigned long tail, write;
        struct ring_buffer *buffer = cpu_buffer->buffer;
        struct ring_buffer_event *event;
-        unsigned long flags;
        bool lock_taken = false;
+        unsigned long flags;
-        commit_page = cpu_buffer->commit_page;
+        next_page = tail_page;
-        /* we just need to protect against interrupts */
-        barrier();
-        tail_page = cpu_buffer->tail_page;
-        write = local_add_return(length, &tail_page->write);
-        tail = write - length;
-        /* See if we shot pass the end of this buffer page */
+        local_irq_save(flags);
-        if (write > BUF_PAGE_SIZE) {
+        /*
-                struct buffer_page *next_page = tail_page;
+         * Since the write to the buffer is still not
+         * fully lockless, we must be careful with NMIs.
+         * The locks in the writers are taken when a write
+         * crosses to a new page. The locks protect against
+         * races with the readers (this will soon be fixed
+         * with a lockless solution).
+         *
+         * Because we can not protect against NMIs, and we
+         * want to keep traces reentrant, we need to manage
+         * what happens when we are in an NMI.
+         *
+         * NMIs can happen after we take the lock.
+         * If we are in an NMI, only take the lock
+         * if it is not already taken. Otherwise
+         * simply fail.
+         */
+        if (unlikely(in_nmi())) {
+                if (!__raw_spin_trylock(&cpu_buffer->lock)) {
+                        cpu_buffer->nmi_dropped++;
+                        goto out_reset;
+                }
+        } else
+                __raw_spin_lock(&cpu_buffer->lock);
-                local_irq_save(flags);
+        lock_taken = true;
-                /*
-                 * Since the write to the buffer is still not
-                 * fully lockless, we must be careful with NMIs.
-                 * The locks in the writers are taken when a write
-                 * crosses to a new page. The locks protect against
-                 * races with the readers (this will soon be fixed
-                 * with a lockless solution).
-                 *
-                 * Because we can not protect against NMIs, and we
-                 * want to keep traces reentrant, we need to manage
-                 * what happens when we are in an NMI.
-                 *
-                 * NMIs can happen after we take the lock.
-                 * If we are in an NMI, only take the lock
-                 * if it is not already taken. Otherwise
-                 * simply fail.
-                 */
-                if (unlikely(in_nmi())) {
-                        if (!__raw_spin_trylock(&cpu_buffer->lock))
-                                goto out_reset;
-                } else
-                        __raw_spin_lock(&cpu_buffer->lock);
-                lock_taken = true;
+        rb_inc_page(cpu_buffer, &next_page);
-                rb_inc_page(cpu_buffer, &next_page);
+        head_page = cpu_buffer->head_page;
+        reader_page = cpu_buffer->reader_page;
-                head_page = cpu_buffer->head_page;
+        /* we grabbed the lock before incrementing */
-                reader_page = cpu_buffer->reader_page;
+        if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
+                goto out_reset;
-                /* we grabbed the lock before incrementing */
+        /*
-                if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
+         * If for some reason, we had an interrupt storm that made
-                        goto out_reset;
+         * it all the way around the buffer, bail, and warn
+         * about it.
+         */
+        if (unlikely(next_page == commit_page)) {
+                cpu_buffer->commit_overrun++;
+                goto out_reset;
+        }
-                /*
+        if (next_page == head_page) {
-                 * If for some reason, we had an interrupt storm that made
+                if (!(buffer->flags & RB_FL_OVERWRITE))
-                 * it all the way around the buffer, bail, and warn
-                 * about it.
-                 */
-                if (unlikely(next_page == commit_page)) {
-                        WARN_ON_ONCE(1);
                        goto out_reset;
-                }
-                if (next_page == head_page) {
+                /* tail_page has not moved yet? */
-                        if (!(buffer->flags & RB_FL_OVERWRITE))
+                if (tail_page == cpu_buffer->tail_page) {
-                                goto out_reset;
+                        /* count overflows */
+                        cpu_buffer->overrun +=
-                        /* tail_page has not moved yet? */
+                                local_read(&head_page->entries);
-                        if (tail_page == cpu_buffer->tail_page) {
-                                /* count overflows */
-                                rb_update_overflow(cpu_buffer);
-                                rb_inc_page(cpu_buffer, &head_page);
+                        rb_inc_page(cpu_buffer, &head_page);
-                                cpu_buffer->head_page = head_page;
+                        cpu_buffer->head_page = head_page;
-                                cpu_buffer->head_page->read = 0;
+                        cpu_buffer->head_page->read = 0;
-                        }
                }
+        }
-                /*
+        /*
-                 * If the tail page is still the same as what we think
+         * If the tail page is still the same as what we think
-                 * it is, then it is up to us to update the tail
+         * it is, then it is up to us to update the tail
-                 * pointer.
+         * pointer.
-                 */
+         */
-                if (tail_page == cpu_buffer->tail_page) {
+        if (tail_page == cpu_buffer->tail_page) {
-                        local_set(&next_page->write, 0);
+                local_set(&next_page->write, 0);
-                        local_set(&next_page->page->commit, 0);
+                local_set(&next_page->entries, 0);
-                        cpu_buffer->tail_page = next_page;
+                local_set(&next_page->page->commit, 0);
+                cpu_buffer->tail_page = next_page;
+                /* reread the time stamp */
+                *ts = rb_time_stamp(buffer, cpu_buffer->cpu);
+                cpu_buffer->tail_page->page->time_stamp = *ts;
+        }
-                        /* reread the time stamp */
+        /*
-                        *ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu);
+         * The actual tail page has moved forward.
-                        cpu_buffer->tail_page->page->time_stamp = *ts;
+         */
-                }
+        if (tail < BUF_PAGE_SIZE) {
+                /* Mark the rest of the page with padding */
+                event = __rb_page_index(tail_page, tail);
+                kmemcheck_annotate_bitfield(event, bitfield);
+                rb_event_set_padding(event);
+        }
-                /*
+        /* Set the write back to the previous setting */
-                 * The actual tail page has moved forward.
+        local_sub(length, &tail_page->write);
-                 */
-                if (tail < BUF_PAGE_SIZE) {
-                        /* Mark the rest of the page with padding */
-                        event = __rb_page_index(tail_page, tail);
-                        rb_event_set_padding(event);
-                }
-                if (tail <= BUF_PAGE_SIZE)
+        /*
-                        /* Set the write back to the previous setting */
+         * If this was a commit entry that failed,
-                        local_set(&tail_page->write, tail);
+         * increment that too
+         */
+        if (tail_page == cpu_buffer->commit_page &&
+            tail == rb_commit_index(cpu_buffer)) {
+                rb_set_commit_to_write(cpu_buffer);
+        }
-                /*
+        __raw_spin_unlock(&cpu_buffer->lock);
-                 * If this was a commit entry that failed,
+        local_irq_restore(flags);
-                 * increment that too
-                 */
+        /* fail and let the caller try again */
-                if (tail_page == cpu_buffer->commit_page &&
+        return ERR_PTR(-EAGAIN);
-                    tail == rb_commit_index(cpu_buffer)) {
-                        rb_set_commit_to_write(cpu_buffer);
+ out_reset:
-                }
+        /* reset write */
+        local_sub(length, &tail_page->write);
+        if (likely(lock_taken))
                __raw_spin_unlock(&cpu_buffer->lock);
-                local_irq_restore(flags);
+        local_irq_restore(flags);
+        return NULL;
+}
-                /* fail and let the caller try again */
+static struct ring_buffer_event *
-                return ERR_PTR(-EAGAIN);
+__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
-        }
+                  unsigned type, unsigned long length, u64 *ts)
+{
+        struct buffer_page *tail_page, *commit_page;
+        struct ring_buffer_event *event;
+        unsigned long tail, write;
+        commit_page = cpu_buffer->commit_page;
+        /* we just need to protect against interrupts */
+        barrier();
+        tail_page = cpu_buffer->tail_page;
+        write = local_add_return(length, &tail_page->write);
+        tail = write - length;
+        /* See if we shot pass the end of this buffer page */
+        if (write > BUF_PAGE_SIZE)
+                return rb_move_tail(cpu_buffer, length, tail,
+                                    commit_page, tail_page, ts);
        /* We reserved something on the buffer */
@@ -1287,8 +1329,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
                return NULL;
        event = __rb_page_index(tail_page, tail);
+        kmemcheck_annotate_bitfield(event, bitfield);
        rb_update_event(event, type, length);
+        /* The passed in type is zero for DATA */
+        if (likely(!type))
+                local_inc(&tail_page->entries);
        /*
         * If this is a commit and the tail is zero, then update
         * this page's time stamp.
@@ -1297,16 +1344,38 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
                cpu_buffer->commit_page->page->time_stamp = *ts;
        return event;
+}
- out_reset:
+static inline int
-        /* reset write */
+rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
-        if (tail <= BUF_PAGE_SIZE)
+                  struct ring_buffer_event *event)
-                local_set(&tail_page->write, tail);
+{
+        unsigned long new_index, old_index;
+        struct buffer_page *bpage;
+        unsigned long index;
+        unsigned long addr;
-        if (likely(lock_taken))
+        new_index = rb_event_index(event);
-                __raw_spin_unlock(&cpu_buffer->lock);
+        old_index = new_index + rb_event_length(event);
-        local_irq_restore(flags);
+        addr = (unsigned long)event;
-        return NULL;
+        addr &= PAGE_MASK;
+        bpage = cpu_buffer->tail_page;
+        if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
+                /*
+                 * This is on the tail page. It is possible that
+                 * a write could come in and move the tail page
+                 * and write to the next page. That is fine
+                 * because we just shorten what is on this page.
+                 */
+                index = local_cmpxchg(&bpage->write, old_index, new_index);
+                if (index == old_index)
+                        return 1;
+        }
+        /* could not discard */
+        return 0;
 }
 static int
@@ -1351,16 +1420,23 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
                        event->array[0] = *delta >> TS_SHIFT;
                } else {
                        cpu_buffer->commit_page->page->time_stamp = *ts;
-                        event->time_delta = 0;
+                        /* try to discard, since we do not need this */
-                        event->array[0] = 0;
+                        if (!rb_try_to_discard(cpu_buffer, event)) {
+                                /* nope, just zero it */
+                                event->time_delta = 0;
+                                event->array[0] = 0;
+                        }
                }
                cpu_buffer->write_stamp = *ts;
                /* let the caller know this was the commit */
                ret = 1;
        } else {
-                /* Darn, this is just wasted space */
+                /* Try to discard the event */
-                event->time_delta = 0;
+                if (!rb_try_to_discard(cpu_buffer, event)) {
-                event->array[0] = 0;
+                        /* Darn, this is just wasted space */
+                        event->time_delta = 0;
+                        event->array[0] = 0;
+                }
                ret = 0;
        }
@@ -1371,13 +1447,14 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 static struct ring_buffer_event *
 rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
-                      unsigned type, unsigned long length)
+                      unsigned long length)
 {
        struct ring_buffer_event *event;
-        u64 ts, delta;
+        u64 ts, delta = 0;
        int commit = 0;
        int nr_loops = 0;
+        length = rb_calculate_event_length(length);
 again:
        /*
         * We allow for interrupts to reenter here and do a trace.
@@ -1391,7 +1468,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
        if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
                return NULL;
-        ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
+        ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
        /*
         * Only the first commit can update the timestamp.
@@ -1401,23 +1478,24 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
         * also be made. But only the entry that did the actual
         * commit will be something other than zero.
         */
-        if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
+        if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
-            rb_page_write(cpu_buffer->tail_page) ==
+                   rb_page_write(cpu_buffer->tail_page) ==
-            rb_commit_index(cpu_buffer)) {
+                   rb_commit_index(cpu_buffer))) {
+                u64 diff;
-                delta = ts - cpu_buffer->write_stamp;
+                diff = ts - cpu_buffer->write_stamp;
-                /* make sure this delta is calculated here */
+                /* make sure this diff is calculated here */
                barrier();
                /* Did the write stamp get updated already? */
                if (unlikely(ts < cpu_buffer->write_stamp))
-                        delta = 0;
+                        goto get_event;
-                if (test_time_stamp(delta)) {
+                delta = diff;
+                if (unlikely(test_time_stamp(delta))) {
                        commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
                        if (commit == -EBUSY)
                                return NULL;
@@ -1426,12 +1504,11 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
                        RB_WARN_ON(cpu_buffer, commit < 0);
                }
-        } else
+        }
-                /* Non commits have zero deltas */
-                delta = 0;
-        event = __rb_reserve_next(cpu_buffer, type, length, &ts);
+ get_event:
-        if (PTR_ERR(event) == -EAGAIN)
+        event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
+        if (unlikely(PTR_ERR(event) == -EAGAIN))
                goto again;
        if (!event) {
@@ -1448,7 +1525,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
         * If the timestamp was commited, make the commit our entry
         * now so that we will update it when needed.
         */
-        if (commit)
+        if (unlikely(commit))
                rb_set_commit_event(cpu_buffer, event);
        else if (!rb_is_commit(cpu_buffer, event))
                delta = 0;
@@ -1458,6 +1535,36 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
        return event;
 }
+#define TRACE_RECURSIVE_DEPTH 16
+static int trace_recursive_lock(void)
+{
+        current->trace_recursion++;
+        if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
+                return 0;
+        /* Disable all tracing before we do anything else */
+        tracing_off_permanent();
+        printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
+                    "HC[%lu]:SC[%lu]:NMI[%lu]\n",
+                    current->trace_recursion,
+                    hardirq_count() >> HARDIRQ_SHIFT,
+                    softirq_count() >> SOFTIRQ_SHIFT,
+                    in_nmi());
+        WARN_ON_ONCE(1);
+        return -1;
+}
+static void trace_recursive_unlock(void)
+{
+        WARN_ON_ONCE(!current->trace_recursion);
+        current->trace_recursion--;
+}
 static DEFINE_PER_CPU(int, rb_need_resched);
 /**
@@ -1491,6 +1598,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
        /* If we are tracing schedule, we don't want to recurse */
        resched = ftrace_preempt_disable();
+        if (trace_recursive_lock())
+                goto out_nocheck;
        cpu = raw_smp_processor_id();
        if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -1501,11 +1611,10 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
        if (atomic_read(&cpu_buffer->record_disabled))
                goto out;
-        length = rb_calculate_event_length(length);
+        if (length > BUF_MAX_DATA_SIZE)
-        if (length > BUF_PAGE_SIZE)
                goto out;
-        event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
+        event = rb_reserve_next_event(cpu_buffer, length);
        if (!event)
                goto out;
@@ -1520,6 +1629,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
        return event;
 out:
+        trace_recursive_unlock();
+ out_nocheck:
        ftrace_preempt_enable(resched);
        return NULL;
 }
@@ -1528,7 +1640,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
                      struct ring_buffer_event *event)
 {
-        cpu_buffer->entries++;
+        local_inc(&cpu_buffer->entries);
        /* Only process further if we own the commit */
        if (!rb_is_commit(cpu_buffer, event))
@@ -1558,6 +1670,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
        rb_commit(cpu_buffer, event);
+        trace_recursive_unlock();
        /*
         * Only the last preempt count needs to restore preemption.
         */
@@ -1570,6 +1684,99 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 }
 EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
+static inline void rb_event_discard(struct ring_buffer_event *event)
+{
+        /* array[0] holds the actual length for the discarded event */
+        event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
+        event->type_len = RINGBUF_TYPE_PADDING;
+        /* time delta must be non zero */
+        if (!event->time_delta)
+                event->time_delta = 1;
+}
+/**
+ * ring_buffer_event_discard - discard any event in the ring buffer
+ * @event: the event to discard
+ *
+ * Sometimes a event that is in the ring buffer needs to be ignored.
+ * This function lets the user discard an event in the ring buffer
+ * and then that event will not be read later.
+ *
+ * Note, it is up to the user to be careful with this, and protect
+ * against races. If the user discards an event that has been consumed
+ * it is possible that it could corrupt the ring buffer.
+ */
+void ring_buffer_event_discard(struct ring_buffer_event *event)
+{
+        rb_event_discard(event);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_event_discard);
+/**
+ * ring_buffer_commit_discard - discard an event that has not been committed
+ * @buffer: the ring buffer
+ * @event: non committed event to discard
+ *
+ * This is similar to ring_buffer_event_discard but must only be
+ * performed on an event that has not been committed yet. The difference
+ * is that this will also try to free the event from the ring buffer
+ * if another event has not been added behind it.
+ *
+ * If another event has been added behind it, it will set the event
+ * up as discarded, and perform the commit.
+ *
+ * If this function is called, do not call ring_buffer_unlock_commit on
+ * the event.
+ */
+void ring_buffer_discard_commit(struct ring_buffer *buffer,
+                                struct ring_buffer_event *event)
+{
+        struct ring_buffer_per_cpu *cpu_buffer;
+        int cpu;
+        /* The event is discarded regardless */
+        rb_event_discard(event);
+        /*
+         * This must only be called if the event has not been
+         * committed yet. Thus we can assume that preemption
+         * is still disabled.
+         */
+        RB_WARN_ON(buffer, preemptible());
+        cpu = smp_processor_id();
+        cpu_buffer = buffer->buffers[cpu];
+        if (!rb_try_to_discard(cpu_buffer, event))
+                goto out;
+        /*
+         * The commit is still visible by the reader, so we
+         * must increment entries.
+         */
+        local_inc(&cpu_buffer->entries);
+ out:
+        /*
+         * If a write came in and pushed the tail page
+         * we still need to update the commit pointer
+         * if we were the commit.
+         */
+        if (rb_is_commit(cpu_buffer, event))
+                rb_set_commit_to_write(cpu_buffer);
+        trace_recursive_unlock();
+        /*
+         * Only the last preempt count needs to restore preemption.
+         */
+        if (preempt_count() == 1)
+                ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
+        else
+                preempt_enable_no_resched_notrace();
+}
+EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
 /**
 * ring_buffer_write - write data to the buffer without reserving
 * @buffer: The ring buffer to write to.
@@ -1589,7 +1796,6 @@ int ring_buffer_write(struct ring_buffer *buffer,
 {
        struct ring_buffer_per_cpu *cpu_buffer;
        struct ring_buffer_event *event;
-        unsigned long event_length;
        void *body;
        int ret = -EBUSY;
        int cpu, resched;
@@ -1612,9 +1818,10 @@ int ring_buffer_write(struct ring_buffer *buffer,
        if (atomic_read(&cpu_buffer->record_disabled))
                goto out;
-        event_length = rb_calculate_event_length(length);
+        if (length > BUF_MAX_DATA_SIZE)
-        event = rb_reserve_next_event(cpu_buffer,
+                goto out;
-                                      RINGBUF_TYPE_DATA, event_length);
+        event = rb_reserve_next_event(cpu_buffer, length);
        if (!event)
                goto out;
@@ -1728,7 +1935,8 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
                return 0;
        cpu_buffer = buffer->buffers[cpu];
-        ret = cpu_buffer->entries;
+        ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun)
+                - cpu_buffer->read;
        return ret;
 }
@@ -1755,6 +1963,47 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
 EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
 /**
+ * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
+{
+        struct ring_buffer_per_cpu *cpu_buffer;
+        unsigned long ret;
+        if (!cpumask_test_cpu(cpu, buffer->cpumask))
+                return 0;
+        cpu_buffer = buffer->buffers[cpu];
+        ret = cpu_buffer->nmi_dropped;
+        return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
+/**
+ * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long
+ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
+{
+        struct ring_buffer_per_cpu *cpu_buffer;
+        unsigned long ret;
+        if (!cpumask_test_cpu(cpu, buffer->cpumask))
+                return 0;
+        cpu_buffer = buffer->buffers[cpu];
+        ret = cpu_buffer->commit_overrun;
+        return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
+/**
 * ring_buffer_entries - get the number of entries in a buffer
 * @buffer: The ring buffer
 *
@@ -1770,7 +2019,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
        /* if you care about this being correct, lock the buffer */
        for_each_buffer_cpu(buffer, cpu) {
                cpu_buffer = buffer->buffers[cpu];
-                entries += cpu_buffer->entries;
+                entries += (local_read(&cpu_buffer->entries) -
+                            cpu_buffer->overrun) - cpu_buffer->read;
        }
        return entries;
@@ -1862,7 +2112,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 {
        u64 delta;
-        switch (event->type) {
+        switch (event->type_len) {
        case RINGBUF_TYPE_PADDING:
                return;
@@ -1893,7 +2143,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
 {
        u64 delta;
-        switch (event->type) {
+        switch (event->type_len) {
        case RINGBUF_TYPE_PADDING:
                return;
@@ -1966,6 +2216,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
        cpu_buffer->reader_page->list.prev = reader->list.prev;
        local_set(&cpu_buffer->reader_page->write, 0);
+        local_set(&cpu_buffer->reader_page->entries, 0);
        local_set(&cpu_buffer->reader_page->page->commit, 0);
        /* Make the reader page now replace the head */
@@ -2008,8 +2259,9 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
        event = rb_reader_event(cpu_buffer);
-        if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event))
+        if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX
-                cpu_buffer->entries--;
+                        || rb_discarded_event(event))
+                cpu_buffer->read++;
        rb_update_read_stamp(cpu_buffer, event);
@@ -2031,8 +2283,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
         * Check if we are at the end of the buffer.
         */
        if (iter->head >= rb_page_size(iter->head_page)) {
-                if (RB_WARN_ON(buffer,
+                /* discarded commits can make the page empty */
-                               iter->head_page == cpu_buffer->commit_page))
+                if (iter->head_page == cpu_buffer->commit_page)
                        return;
                rb_inc_iter(iter);
                return;
@@ -2075,12 +2327,10 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
        /*
         * We repeat when a timestamp is encountered. It is possible
         * to get multiple timestamps from an interrupt entering just
-         * as one timestamp is about to be written. The max times
+         * as one timestamp is about to be written, or from discarded
-         * that this can happen is the number of nested interrupts we
+         * commits. The most that we can have is the number on a single page.
-         * can have.  Nesting 10 deep of interrupts is clearly
-         * an anomaly.
         */
-        if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
+        if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
                return NULL;
        reader = rb_get_reader_page(cpu_buffer);
@@ -2089,7 +2339,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
        event = rb_reader_event(cpu_buffer);
-        switch (event->type) {
+        switch (event->type_len) {
        case RINGBUF_TYPE_PADDING:
                if (rb_null_event(event))
                        RB_WARN_ON(cpu_buffer, 1);
@@ -2146,14 +2396,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 again:
        /*
-         * We repeat when a timestamp is encountered. It is possible
+         * We repeat when a timestamp is encountered.
-         * to get multiple timestamps from an interrupt entering just
+         * We can get multiple timestamps by nested interrupts or also
-         * as one timestamp is about to be written. The max times
+         * if filtering is on (discarding commits). Since discarding
-         * that this can happen is the number of nested interrupts we
+         * commits can be frequent we can get a lot of timestamps.
-         * can have. Nesting 10 deep of interrupts is clearly
+         * But we limit them by not adding timestamps if they begin
-         * an anomaly.
+         * at the start of a page.
         */
-        if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
+        if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
                return NULL;
        if (rb_per_cpu_empty(cpu_buffer))
@@ -2161,7 +2411,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
        event = rb_iter_head_event(iter);
-        switch (event->type) {
+        switch (event->type_len) {
        case RINGBUF_TYPE_PADDING:
                if (rb_null_event(event)) {
                        rb_inc_iter(iter);
@@ -2220,7 +2470,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
        event = rb_buffer_peek(buffer, cpu, ts);
        spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
-        if (event && event->type == RINGBUF_TYPE_PADDING) {
+        if (event && event->type_len == RINGBUF_TYPE_PADDING) {
                cpu_relax();
                goto again;
        }
@@ -2248,7 +2498,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
        event = rb_iter_peek(iter, ts);
        spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
-        if (event && event->type == RINGBUF_TYPE_PADDING) {
+        if (event && event->type_len == RINGBUF_TYPE_PADDING) {
                cpu_relax();
                goto again;
        }
@@ -2293,7 +2543,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
 out:
        preempt_enable();
-        if (event && event->type == RINGBUF_TYPE_PADDING) {
+        if (event && event->type_len == RINGBUF_TYPE_PADDING) {
                cpu_relax();
                goto again;
        }
@@ -2386,7 +2636,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
 out:
        spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
-        if (event && event->type == RINGBUF_TYPE_PADDING) {
+        if (event && event->type_len == RINGBUF_TYPE_PADDING) {
                cpu_relax();
                goto again;
        }
@@ -2411,6 +2661,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
        cpu_buffer->head_page
                = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
        local_set(&cpu_buffer->head_page->write, 0);
+        local_set(&cpu_buffer->head_page->entries, 0);
        local_set(&cpu_buffer->head_page->page->commit, 0);
        cpu_buffer->head_page->read = 0;
@@ -2420,11 +2671,15 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
        INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
        local_set(&cpu_buffer->reader_page->write, 0);
+        local_set(&cpu_buffer->reader_page->entries, 0);
        local_set(&cpu_buffer->reader_page->page->commit, 0);
        cpu_buffer->reader_page->read = 0;
+        cpu_buffer->nmi_dropped = 0;
+        cpu_buffer->commit_overrun = 0;
        cpu_buffer->overrun = 0;
-        cpu_buffer->entries = 0;
+        cpu_buffer->read = 0;
+        local_set(&cpu_buffer->entries, 0);
        cpu_buffer->write_stamp = 0;
        cpu_buffer->read_stamp = 0;
@@ -2443,6 +2698,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
        if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return;
+        atomic_inc(&cpu_buffer->record_disabled);
        spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
        __raw_spin_lock(&cpu_buffer->lock);
@@ -2452,6 +2709,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
        __raw_spin_unlock(&cpu_buffer->lock);
        spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+        atomic_dec(&cpu_buffer->record_disabled);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
@@ -2578,28 +2837,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
-static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
-                              struct buffer_data_page *bpage,
-                              unsigned int offset)
-{
-        struct ring_buffer_event *event;
-        unsigned long head;
-        __raw_spin_lock(&cpu_buffer->lock);
-        for (head = offset; head < local_read(&bpage->commit);
-             head += rb_event_length(event)) {
-                event = __rb_data_page_index(bpage, head);
-                if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
-                        return;
-                /* Only count data entries */
-                if (event->type != RINGBUF_TYPE_DATA)
-                        continue;
-                cpu_buffer->entries--;
-        }
-        __raw_spin_unlock(&cpu_buffer->lock);
-}
 /**
 * ring_buffer_alloc_read_page - allocate a page to read from buffer
 * @buffer: the buffer to allocate for.
@@ -2630,6 +2867,7 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
        return bpage;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
 /**
 * ring_buffer_free_read_page - free an allocated read page
@@ -2642,6 +2880,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
 {
        free_page((unsigned long)data);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
 /**
 * ring_buffer_read_page - extract a page from the ring buffer
@@ -2768,16 +3007,17 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
                /* we copied everything to the beginning */
                read = 0;
        } else {
+                /* update the entry counter */
+                cpu_buffer->read += local_read(&reader->entries);
                /* swap the pages */
                rb_init_page(bpage);
                bpage = reader->page;
                reader->page = *data_page;
                local_set(&reader->write, 0);
+                local_set(&reader->entries, 0);
                reader->read = 0;
                *data_page = bpage;
-                /* update the entry counter */
-                rb_remove_entries(cpu_buffer, bpage, read);
        }
        ret = read;
@@ -2787,6 +3027,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
 out:
        return ret;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_read_page);
 static ssize_t
 rb_simple_read(struct file *filp, char __user *ubuf,
@@ -2845,14 +3086,11 @@ static const struct file_operations rb_simple_fops = {
 static __init int rb_init_debugfs(void)
 {
        struct dentry *d_tracer;
-        struct dentry *entry;
        d_tracer = tracing_init_dentry();
-        entry = debugfs_create_file("tracing_on", 0644, d_tracer,
+        trace_create_file("tracing_on", 0644, d_tracer,
-                                    &ring_buffer_flags, &rb_simple_fops);
+                            &ring_buffer_flags, &rb_simple_fops);
-        if (!entry)
-                pr_warning("Could not create debugfs 'tracing_on' entry\n");
        return 0;
 }