aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c281
1 files changed, 189 insertions, 92 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bd38c5cfd8ad..178858492a89 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4,13 +4,15 @@
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> 4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */ 5 */
6#include <linux/ring_buffer.h> 6#include <linux/ring_buffer.h>
7#include <linux/trace_clock.h>
8#include <linux/ftrace_irq.h>
7#include <linux/spinlock.h> 9#include <linux/spinlock.h>
8#include <linux/debugfs.h> 10#include <linux/debugfs.h>
9#include <linux/uaccess.h> 11#include <linux/uaccess.h>
12#include <linux/hardirq.h>
10#include <linux/module.h> 13#include <linux/module.h>
11#include <linux/percpu.h> 14#include <linux/percpu.h>
12#include <linux/mutex.h> 15#include <linux/mutex.h>
13#include <linux/sched.h> /* used for sched_clock() (for now) */
14#include <linux/init.h> 16#include <linux/init.h>
15#include <linux/hash.h> 17#include <linux/hash.h>
16#include <linux/list.h> 18#include <linux/list.h>
@@ -57,7 +59,9 @@ enum {
57 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, 59 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
58}; 60};
59 61
60static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; 62static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
63
64#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
61 65
62/** 66/**
63 * tracing_on - enable all tracing buffers 67 * tracing_on - enable all tracing buffers
@@ -89,26 +93,34 @@ EXPORT_SYMBOL_GPL(tracing_off);
89 * tracing_off_permanent - permanently disable ring buffers 93 * tracing_off_permanent - permanently disable ring buffers
90 * 94 *
91 * This function, once called, will disable all ring buffers 95 * This function, once called, will disable all ring buffers
92 * permanenty. 96 * permanently.
93 */ 97 */
94void tracing_off_permanent(void) 98void tracing_off_permanent(void)
95{ 99{
96 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); 100 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
97} 101}
98 102
103/**
104 * tracing_is_on - show state of ring buffers enabled
105 */
106int tracing_is_on(void)
107{
108 return ring_buffer_flags == RB_BUFFERS_ON;
109}
110EXPORT_SYMBOL_GPL(tracing_is_on);
111
99#include "trace.h" 112#include "trace.h"
100 113
101/* Up this if you want to test the TIME_EXTENTS and normalization */ 114/* Up this if you want to test the TIME_EXTENTS and normalization */
102#define DEBUG_SHIFT 0 115#define DEBUG_SHIFT 0
103 116
104/* FIXME!!! */
105u64 ring_buffer_time_stamp(int cpu) 117u64 ring_buffer_time_stamp(int cpu)
106{ 118{
107 u64 time; 119 u64 time;
108 120
109 preempt_disable_notrace(); 121 preempt_disable_notrace();
110 /* shift to debug/test normalization and TIME_EXTENTS */ 122 /* shift to debug/test normalization and TIME_EXTENTS */
111 time = sched_clock() << DEBUG_SHIFT; 123 time = trace_clock_local() << DEBUG_SHIFT;
112 preempt_enable_no_resched_notrace(); 124 preempt_enable_no_resched_notrace();
113 125
114 return time; 126 return time;
@@ -122,9 +134,8 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
122} 134}
123EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); 135EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
124 136
125#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event)) 137#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
126#define RB_ALIGNMENT_SHIFT 2 138#define RB_ALIGNMENT 4U
127#define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
128#define RB_MAX_SMALL_DATA 28 139#define RB_MAX_SMALL_DATA 28
129 140
130enum { 141enum {
@@ -133,7 +144,7 @@ enum {
133}; 144};
134 145
135/* inline for ring buffer fast paths */ 146/* inline for ring buffer fast paths */
136static inline unsigned 147static unsigned
137rb_event_length(struct ring_buffer_event *event) 148rb_event_length(struct ring_buffer_event *event)
138{ 149{
139 unsigned length; 150 unsigned length;
@@ -151,7 +162,7 @@ rb_event_length(struct ring_buffer_event *event)
151 162
152 case RINGBUF_TYPE_DATA: 163 case RINGBUF_TYPE_DATA:
153 if (event->len) 164 if (event->len)
154 length = event->len << RB_ALIGNMENT_SHIFT; 165 length = event->len * RB_ALIGNMENT;
155 else 166 else
156 length = event->array[0]; 167 length = event->array[0];
157 return length + RB_EVNT_HDR_SIZE; 168 return length + RB_EVNT_HDR_SIZE;
@@ -179,7 +190,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event)
179EXPORT_SYMBOL_GPL(ring_buffer_event_length); 190EXPORT_SYMBOL_GPL(ring_buffer_event_length);
180 191
181/* inline for ring buffer fast paths */ 192/* inline for ring buffer fast paths */
182static inline void * 193static void *
183rb_event_data(struct ring_buffer_event *event) 194rb_event_data(struct ring_buffer_event *event)
184{ 195{
185 BUG_ON(event->type != RINGBUF_TYPE_DATA); 196 BUG_ON(event->type != RINGBUF_TYPE_DATA);
@@ -209,7 +220,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
209 220
210struct buffer_data_page { 221struct buffer_data_page {
211 u64 time_stamp; /* page time stamp */ 222 u64 time_stamp; /* page time stamp */
212 local_t commit; /* write commited index */ 223 local_t commit; /* write committed index */
213 unsigned char data[]; /* data of buffer page */ 224 unsigned char data[]; /* data of buffer page */
214}; 225};
215 226
@@ -225,14 +236,25 @@ static void rb_init_page(struct buffer_data_page *bpage)
225 local_set(&bpage->commit, 0); 236 local_set(&bpage->commit, 0);
226} 237}
227 238
239/**
240 * ring_buffer_page_len - the size of data on the page.
241 * @page: The page to read
242 *
243 * Returns the amount of data on the page, including buffer page header.
244 */
245size_t ring_buffer_page_len(void *page)
246{
247 return local_read(&((struct buffer_data_page *)page)->commit)
248 + BUF_PAGE_HDR_SIZE;
249}
250
228/* 251/*
229 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 252 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
230 * this issue out. 253 * this issue out.
231 */ 254 */
232static inline void free_buffer_page(struct buffer_page *bpage) 255static void free_buffer_page(struct buffer_page *bpage)
233{ 256{
234 if (bpage->page) 257 free_page((unsigned long)bpage->page);
235 free_page((unsigned long)bpage->page);
236 kfree(bpage); 258 kfree(bpage);
237} 259}
238 260
@@ -246,7 +268,7 @@ static inline int test_time_stamp(u64 delta)
246 return 0; 268 return 0;
247} 269}
248 270
249#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data)) 271#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
250 272
251/* 273/*
252 * head_page == tail_page && head == tail then buffer is empty. 274 * head_page == tail_page && head == tail then buffer is empty.
@@ -260,7 +282,7 @@ struct ring_buffer_per_cpu {
260 struct list_head pages; 282 struct list_head pages;
261 struct buffer_page *head_page; /* read from head */ 283 struct buffer_page *head_page; /* read from head */
262 struct buffer_page *tail_page; /* write to tail */ 284 struct buffer_page *tail_page; /* write to tail */
263 struct buffer_page *commit_page; /* commited pages */ 285 struct buffer_page *commit_page; /* committed pages */
264 struct buffer_page *reader_page; 286 struct buffer_page *reader_page;
265 unsigned long overrun; 287 unsigned long overrun;
266 unsigned long entries; 288 unsigned long entries;
@@ -273,8 +295,8 @@ struct ring_buffer {
273 unsigned pages; 295 unsigned pages;
274 unsigned flags; 296 unsigned flags;
275 int cpus; 297 int cpus;
276 cpumask_var_t cpumask;
277 atomic_t record_disabled; 298 atomic_t record_disabled;
299 cpumask_var_t cpumask;
278 300
279 struct mutex mutex; 301 struct mutex mutex;
280 302
@@ -303,7 +325,7 @@ struct ring_buffer_iter {
303 * check_pages - integrity check of buffer pages 325 * check_pages - integrity check of buffer pages
304 * @cpu_buffer: CPU buffer with pages to test 326 * @cpu_buffer: CPU buffer with pages to test
305 * 327 *
306 * As a safty measure we check to make sure the data pages have not 328 * As a safety measure we check to make sure the data pages have not
307 * been corrupted. 329 * been corrupted.
308 */ 330 */
309static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 331static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
@@ -811,7 +833,7 @@ rb_event_index(struct ring_buffer_event *event)
811 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); 833 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
812} 834}
813 835
814static inline int 836static int
815rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, 837rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
816 struct ring_buffer_event *event) 838 struct ring_buffer_event *event)
817{ 839{
@@ -825,7 +847,7 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
825 rb_commit_index(cpu_buffer) == index; 847 rb_commit_index(cpu_buffer) == index;
826} 848}
827 849
828static inline void 850static void
829rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, 851rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
830 struct ring_buffer_event *event) 852 struct ring_buffer_event *event)
831{ 853{
@@ -850,7 +872,7 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
850 local_set(&cpu_buffer->commit_page->page->commit, index); 872 local_set(&cpu_buffer->commit_page->page->commit, index);
851} 873}
852 874
853static inline void 875static void
854rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 876rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
855{ 877{
856 /* 878 /*
@@ -896,7 +918,7 @@ static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
896 cpu_buffer->reader_page->read = 0; 918 cpu_buffer->reader_page->read = 0;
897} 919}
898 920
899static inline void rb_inc_iter(struct ring_buffer_iter *iter) 921static void rb_inc_iter(struct ring_buffer_iter *iter)
900{ 922{
901 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 923 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
902 924
@@ -926,7 +948,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
926 * and with this, we can determine what to place into the 948 * and with this, we can determine what to place into the
927 * data field. 949 * data field.
928 */ 950 */
929static inline void 951static void
930rb_update_event(struct ring_buffer_event *event, 952rb_update_event(struct ring_buffer_event *event,
931 unsigned type, unsigned length) 953 unsigned type, unsigned length)
932{ 954{
@@ -938,15 +960,11 @@ rb_update_event(struct ring_buffer_event *event,
938 break; 960 break;
939 961
940 case RINGBUF_TYPE_TIME_EXTEND: 962 case RINGBUF_TYPE_TIME_EXTEND:
941 event->len = 963 event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
942 (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
943 >> RB_ALIGNMENT_SHIFT;
944 break; 964 break;
945 965
946 case RINGBUF_TYPE_TIME_STAMP: 966 case RINGBUF_TYPE_TIME_STAMP:
947 event->len = 967 event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
948 (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
949 >> RB_ALIGNMENT_SHIFT;
950 break; 968 break;
951 969
952 case RINGBUF_TYPE_DATA: 970 case RINGBUF_TYPE_DATA:
@@ -955,16 +973,14 @@ rb_update_event(struct ring_buffer_event *event,
955 event->len = 0; 973 event->len = 0;
956 event->array[0] = length; 974 event->array[0] = length;
957 } else 975 } else
958 event->len = 976 event->len = DIV_ROUND_UP(length, RB_ALIGNMENT);
959 (length + (RB_ALIGNMENT-1))
960 >> RB_ALIGNMENT_SHIFT;
961 break; 977 break;
962 default: 978 default:
963 BUG(); 979 BUG();
964 } 980 }
965} 981}
966 982
967static inline unsigned rb_calculate_event_length(unsigned length) 983static unsigned rb_calculate_event_length(unsigned length)
968{ 984{
969 struct ring_buffer_event event; /* Used only for sizeof array */ 985 struct ring_buffer_event event; /* Used only for sizeof array */
970 986
@@ -990,6 +1006,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
990 struct ring_buffer *buffer = cpu_buffer->buffer; 1006 struct ring_buffer *buffer = cpu_buffer->buffer;
991 struct ring_buffer_event *event; 1007 struct ring_buffer_event *event;
992 unsigned long flags; 1008 unsigned long flags;
1009 bool lock_taken = false;
993 1010
994 commit_page = cpu_buffer->commit_page; 1011 commit_page = cpu_buffer->commit_page;
995 /* we just need to protect against interrupts */ 1012 /* we just need to protect against interrupts */
@@ -1003,7 +1020,30 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1003 struct buffer_page *next_page = tail_page; 1020 struct buffer_page *next_page = tail_page;
1004 1021
1005 local_irq_save(flags); 1022 local_irq_save(flags);
1006 __raw_spin_lock(&cpu_buffer->lock); 1023 /*
1024 * Since the write to the buffer is still not
1025 * fully lockless, we must be careful with NMIs.
1026 * The locks in the writers are taken when a write
1027 * crosses to a new page. The locks protect against
1028 * races with the readers (this will soon be fixed
1029 * with a lockless solution).
1030 *
1031 * Because we can not protect against NMIs, and we
1032 * want to keep traces reentrant, we need to manage
1033 * what happens when we are in an NMI.
1034 *
1035 * NMIs can happen after we take the lock.
1036 * If we are in an NMI, only take the lock
1037 * if it is not already taken. Otherwise
1038 * simply fail.
1039 */
1040 if (unlikely(in_nmi())) {
1041 if (!__raw_spin_trylock(&cpu_buffer->lock))
1042 goto out_reset;
1043 } else
1044 __raw_spin_lock(&cpu_buffer->lock);
1045
1046 lock_taken = true;
1007 1047
1008 rb_inc_page(cpu_buffer, &next_page); 1048 rb_inc_page(cpu_buffer, &next_page);
1009 1049
@@ -1012,7 +1052,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1012 1052
1013 /* we grabbed the lock before incrementing */ 1053 /* we grabbed the lock before incrementing */
1014 if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) 1054 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
1015 goto out_unlock; 1055 goto out_reset;
1016 1056
1017 /* 1057 /*
1018 * If for some reason, we had an interrupt storm that made 1058 * If for some reason, we had an interrupt storm that made
@@ -1021,12 +1061,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1021 */ 1061 */
1022 if (unlikely(next_page == commit_page)) { 1062 if (unlikely(next_page == commit_page)) {
1023 WARN_ON_ONCE(1); 1063 WARN_ON_ONCE(1);
1024 goto out_unlock; 1064 goto out_reset;
1025 } 1065 }
1026 1066
1027 if (next_page == head_page) { 1067 if (next_page == head_page) {
1028 if (!(buffer->flags & RB_FL_OVERWRITE)) 1068 if (!(buffer->flags & RB_FL_OVERWRITE))
1029 goto out_unlock; 1069 goto out_reset;
1030 1070
1031 /* tail_page has not moved yet? */ 1071 /* tail_page has not moved yet? */
1032 if (tail_page == cpu_buffer->tail_page) { 1072 if (tail_page == cpu_buffer->tail_page) {
@@ -1100,12 +1140,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1100 1140
1101 return event; 1141 return event;
1102 1142
1103 out_unlock: 1143 out_reset:
1104 /* reset write */ 1144 /* reset write */
1105 if (tail <= BUF_PAGE_SIZE) 1145 if (tail <= BUF_PAGE_SIZE)
1106 local_set(&tail_page->write, tail); 1146 local_set(&tail_page->write, tail);
1107 1147
1108 __raw_spin_unlock(&cpu_buffer->lock); 1148 if (likely(lock_taken))
1149 __raw_spin_unlock(&cpu_buffer->lock);
1109 local_irq_restore(flags); 1150 local_irq_restore(flags);
1110 return NULL; 1151 return NULL;
1111} 1152}
@@ -1265,7 +1306,6 @@ static DEFINE_PER_CPU(int, rb_need_resched);
1265 * ring_buffer_lock_reserve - reserve a part of the buffer 1306 * ring_buffer_lock_reserve - reserve a part of the buffer
1266 * @buffer: the ring buffer to reserve from 1307 * @buffer: the ring buffer to reserve from
1267 * @length: the length of the data to reserve (excluding event header) 1308 * @length: the length of the data to reserve (excluding event header)
1268 * @flags: a pointer to save the interrupt flags
1269 * 1309 *
1270 * Returns a reseverd event on the ring buffer to copy directly to. 1310 * Returns a reseverd event on the ring buffer to copy directly to.
1271 * The user of this interface will need to get the body to write into 1311 * The user of this interface will need to get the body to write into
@@ -1278,9 +1318,7 @@ static DEFINE_PER_CPU(int, rb_need_resched);
1278 * If NULL is returned, then nothing has been allocated or locked. 1318 * If NULL is returned, then nothing has been allocated or locked.
1279 */ 1319 */
1280struct ring_buffer_event * 1320struct ring_buffer_event *
1281ring_buffer_lock_reserve(struct ring_buffer *buffer, 1321ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1282 unsigned long length,
1283 unsigned long *flags)
1284{ 1322{
1285 struct ring_buffer_per_cpu *cpu_buffer; 1323 struct ring_buffer_per_cpu *cpu_buffer;
1286 struct ring_buffer_event *event; 1324 struct ring_buffer_event *event;
@@ -1347,15 +1385,13 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1347 * ring_buffer_unlock_commit - commit a reserved 1385 * ring_buffer_unlock_commit - commit a reserved
1348 * @buffer: The buffer to commit to 1386 * @buffer: The buffer to commit to
1349 * @event: The event pointer to commit. 1387 * @event: The event pointer to commit.
1350 * @flags: the interrupt flags received from ring_buffer_lock_reserve.
1351 * 1388 *
1352 * This commits the data to the ring buffer, and releases any locks held. 1389 * This commits the data to the ring buffer, and releases any locks held.
1353 * 1390 *
1354 * Must be paired with ring_buffer_lock_reserve. 1391 * Must be paired with ring_buffer_lock_reserve.
1355 */ 1392 */
1356int ring_buffer_unlock_commit(struct ring_buffer *buffer, 1393int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1357 struct ring_buffer_event *event, 1394 struct ring_buffer_event *event)
1358 unsigned long flags)
1359{ 1395{
1360 struct ring_buffer_per_cpu *cpu_buffer; 1396 struct ring_buffer_per_cpu *cpu_buffer;
1361 int cpu = raw_smp_processor_id(); 1397 int cpu = raw_smp_processor_id();
@@ -1438,7 +1474,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1438} 1474}
1439EXPORT_SYMBOL_GPL(ring_buffer_write); 1475EXPORT_SYMBOL_GPL(ring_buffer_write);
1440 1476
1441static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 1477static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1442{ 1478{
1443 struct buffer_page *reader = cpu_buffer->reader_page; 1479 struct buffer_page *reader = cpu_buffer->reader_page;
1444 struct buffer_page *head = cpu_buffer->head_page; 1480 struct buffer_page *head = cpu_buffer->head_page;
@@ -2277,9 +2313,24 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2277 if (buffer_a->pages != buffer_b->pages) 2313 if (buffer_a->pages != buffer_b->pages)
2278 return -EINVAL; 2314 return -EINVAL;
2279 2315
2316 if (ring_buffer_flags != RB_BUFFERS_ON)
2317 return -EAGAIN;
2318
2319 if (atomic_read(&buffer_a->record_disabled))
2320 return -EAGAIN;
2321
2322 if (atomic_read(&buffer_b->record_disabled))
2323 return -EAGAIN;
2324
2280 cpu_buffer_a = buffer_a->buffers[cpu]; 2325 cpu_buffer_a = buffer_a->buffers[cpu];
2281 cpu_buffer_b = buffer_b->buffers[cpu]; 2326 cpu_buffer_b = buffer_b->buffers[cpu];
2282 2327
2328 if (atomic_read(&cpu_buffer_a->record_disabled))
2329 return -EAGAIN;
2330
2331 if (atomic_read(&cpu_buffer_b->record_disabled))
2332 return -EAGAIN;
2333
2283 /* 2334 /*
2284 * We can't do a synchronize_sched here because this 2335 * We can't do a synchronize_sched here because this
2285 * function can be called in atomic context. 2336 * function can be called in atomic context.
@@ -2303,13 +2354,14 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2303EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); 2354EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
2304 2355
2305static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, 2356static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2306 struct buffer_data_page *bpage) 2357 struct buffer_data_page *bpage,
2358 unsigned int offset)
2307{ 2359{
2308 struct ring_buffer_event *event; 2360 struct ring_buffer_event *event;
2309 unsigned long head; 2361 unsigned long head;
2310 2362
2311 __raw_spin_lock(&cpu_buffer->lock); 2363 __raw_spin_lock(&cpu_buffer->lock);
2312 for (head = 0; head < local_read(&bpage->commit); 2364 for (head = offset; head < local_read(&bpage->commit);
2313 head += rb_event_length(event)) { 2365 head += rb_event_length(event)) {
2314 2366
2315 event = __rb_data_page_index(bpage, head); 2367 event = __rb_data_page_index(bpage, head);
@@ -2340,8 +2392,8 @@ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2340 */ 2392 */
2341void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) 2393void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2342{ 2394{
2343 unsigned long addr;
2344 struct buffer_data_page *bpage; 2395 struct buffer_data_page *bpage;
2396 unsigned long addr;
2345 2397
2346 addr = __get_free_page(GFP_KERNEL); 2398 addr = __get_free_page(GFP_KERNEL);
2347 if (!addr) 2399 if (!addr)
@@ -2349,6 +2401,8 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2349 2401
2350 bpage = (void *)addr; 2402 bpage = (void *)addr;
2351 2403
2404 rb_init_page(bpage);
2405
2352 return bpage; 2406 return bpage;
2353} 2407}
2354 2408
@@ -2368,6 +2422,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2368 * ring_buffer_read_page - extract a page from the ring buffer 2422 * ring_buffer_read_page - extract a page from the ring buffer
2369 * @buffer: buffer to extract from 2423 * @buffer: buffer to extract from
2370 * @data_page: the page to use allocated from ring_buffer_alloc_read_page 2424 * @data_page: the page to use allocated from ring_buffer_alloc_read_page
2425 * @len: amount to extract
2371 * @cpu: the cpu of the buffer to extract 2426 * @cpu: the cpu of the buffer to extract
2372 * @full: should the extraction only happen when the page is full. 2427 * @full: should the extraction only happen when the page is full.
2373 * 2428 *
@@ -2377,12 +2432,12 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2377 * to swap with a page in the ring buffer. 2432 * to swap with a page in the ring buffer.
2378 * 2433 *
2379 * for example: 2434 * for example:
2380 * rpage = ring_buffer_alloc_page(buffer); 2435 * rpage = ring_buffer_alloc_read_page(buffer);
2381 * if (!rpage) 2436 * if (!rpage)
2382 * return error; 2437 * return error;
2383 * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0); 2438 * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0);
2384 * if (ret) 2439 * if (ret >= 0)
2385 * process_page(rpage); 2440 * process_page(rpage, ret);
2386 * 2441 *
2387 * When @full is set, the function will not return true unless 2442 * When @full is set, the function will not return true unless
2388 * the writer is off the reader page. 2443 * the writer is off the reader page.
@@ -2393,69 +2448,111 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2393 * responsible for that. 2448 * responsible for that.
2394 * 2449 *
2395 * Returns: 2450 * Returns:
2396 * 1 if data has been transferred 2451 * >=0 if data has been transferred, returns the offset of consumed data.
2397 * 0 if no data has been transferred. 2452 * <0 if no data has been transferred.
2398 */ 2453 */
2399int ring_buffer_read_page(struct ring_buffer *buffer, 2454int ring_buffer_read_page(struct ring_buffer *buffer,
2400 void **data_page, int cpu, int full) 2455 void **data_page, size_t len, int cpu, int full)
2401{ 2456{
2402 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 2457 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2403 struct ring_buffer_event *event; 2458 struct ring_buffer_event *event;
2404 struct buffer_data_page *bpage; 2459 struct buffer_data_page *bpage;
2460 struct buffer_page *reader;
2405 unsigned long flags; 2461 unsigned long flags;
2406 int ret = 0; 2462 unsigned int commit;
2463 unsigned int read;
2464 u64 save_timestamp;
2465 int ret = -1;
2466
2467 /*
2468 * If len is not big enough to hold the page header, then
2469 * we can not copy anything.
2470 */
2471 if (len <= BUF_PAGE_HDR_SIZE)
2472 return -1;
2473
2474 len -= BUF_PAGE_HDR_SIZE;
2407 2475
2408 if (!data_page) 2476 if (!data_page)
2409 return 0; 2477 return -1;
2410 2478
2411 bpage = *data_page; 2479 bpage = *data_page;
2412 if (!bpage) 2480 if (!bpage)
2413 return 0; 2481 return -1;
2414 2482
2415 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2483 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2416 2484
2417 /* 2485 reader = rb_get_reader_page(cpu_buffer);
2418 * rb_buffer_peek will get the next ring buffer if 2486 if (!reader)
2419 * the current reader page is empty.
2420 */
2421 event = rb_buffer_peek(buffer, cpu, NULL);
2422 if (!event)
2423 goto out; 2487 goto out;
2424 2488
2425 /* check for data */ 2489 event = rb_reader_event(cpu_buffer);
2426 if (!local_read(&cpu_buffer->reader_page->page->commit)) 2490
2427 goto out; 2491 read = reader->read;
2492 commit = rb_page_commit(reader);
2493
2428 /* 2494 /*
2429 * If the writer is already off of the read page, then simply 2495 * If this page has been partially read or
2430 * switch the read page with the given page. Otherwise 2496 * if len is not big enough to read the rest of the page or
2431 * we need to copy the data from the reader to the writer. 2497 * a writer is still on the page, then
2498 * we must copy the data from the page to the buffer.
2499 * Otherwise, we can simply swap the page with the one passed in.
2432 */ 2500 */
2433 if (cpu_buffer->reader_page == cpu_buffer->commit_page) { 2501 if (read || (len < (commit - read)) ||
2434 unsigned int read = cpu_buffer->reader_page->read; 2502 cpu_buffer->reader_page == cpu_buffer->commit_page) {
2503 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
2504 unsigned int rpos = read;
2505 unsigned int pos = 0;
2506 unsigned int size;
2435 2507
2436 if (full) 2508 if (full)
2437 goto out; 2509 goto out;
2438 /* The writer is still on the reader page, we must copy */
2439 bpage = cpu_buffer->reader_page->page;
2440 memcpy(bpage->data,
2441 cpu_buffer->reader_page->page->data + read,
2442 local_read(&bpage->commit) - read);
2443 2510
2444 /* consume what was read */ 2511 if (len > (commit - read))
2445 cpu_buffer->reader_page += read; 2512 len = (commit - read);
2446 2513
2514 size = rb_event_length(event);
2515
2516 if (len < size)
2517 goto out;
2518
2519 /* save the current timestamp, since the user will need it */
2520 save_timestamp = cpu_buffer->read_stamp;
2521
2522 /* Need to copy one event at a time */
2523 do {
2524 memcpy(bpage->data + pos, rpage->data + rpos, size);
2525
2526 len -= size;
2527
2528 rb_advance_reader(cpu_buffer);
2529 rpos = reader->read;
2530 pos += size;
2531
2532 event = rb_reader_event(cpu_buffer);
2533 size = rb_event_length(event);
2534 } while (len > size);
2535
2536 /* update bpage */
2537 local_set(&bpage->commit, pos);
2538 bpage->time_stamp = save_timestamp;
2539
2540 /* we copied everything to the beginning */
2541 read = 0;
2447 } else { 2542 } else {
2448 /* swap the pages */ 2543 /* swap the pages */
2449 rb_init_page(bpage); 2544 rb_init_page(bpage);
2450 bpage = cpu_buffer->reader_page->page; 2545 bpage = reader->page;
2451 cpu_buffer->reader_page->page = *data_page; 2546 reader->page = *data_page;
2452 cpu_buffer->reader_page->read = 0; 2547 local_set(&reader->write, 0);
2548 reader->read = 0;
2453 *data_page = bpage; 2549 *data_page = bpage;
2550
2551 /* update the entry counter */
2552 rb_remove_entries(cpu_buffer, bpage, read);
2454 } 2553 }
2455 ret = 1; 2554 ret = read;
2456 2555
2457 /* update the entry counter */
2458 rb_remove_entries(cpu_buffer, bpage);
2459 out: 2556 out:
2460 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2557 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2461 2558
@@ -2466,7 +2563,7 @@ static ssize_t
2466rb_simple_read(struct file *filp, char __user *ubuf, 2563rb_simple_read(struct file *filp, char __user *ubuf,
2467 size_t cnt, loff_t *ppos) 2564 size_t cnt, loff_t *ppos)
2468{ 2565{
2469 long *p = filp->private_data; 2566 unsigned long *p = filp->private_data;
2470 char buf[64]; 2567 char buf[64];
2471 int r; 2568 int r;
2472 2569
@@ -2482,9 +2579,9 @@ static ssize_t
2482rb_simple_write(struct file *filp, const char __user *ubuf, 2579rb_simple_write(struct file *filp, const char __user *ubuf,
2483 size_t cnt, loff_t *ppos) 2580 size_t cnt, loff_t *ppos)
2484{ 2581{
2485 long *p = filp->private_data; 2582 unsigned long *p = filp->private_data;
2486 char buf[64]; 2583 char buf[64];
2487 long val; 2584 unsigned long val;
2488 int ret; 2585 int ret;
2489 2586
2490 if (cnt >= sizeof(buf)) 2587 if (cnt >= sizeof(buf))
@@ -2509,7 +2606,7 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
2509 return cnt; 2606 return cnt;
2510} 2607}
2511 2608
2512static struct file_operations rb_simple_fops = { 2609static const struct file_operations rb_simple_fops = {
2513 .open = tracing_open_generic, 2610 .open = tracing_open_generic,
2514 .read = rb_simple_read, 2611 .read = rb_simple_read,
2515 .write = rb_simple_write, 2612 .write = rb_simple_write,