diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 780 |
1 files changed, 509 insertions, 271 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 960cbf44c844..dc4dc70171ce 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/debugfs.h> | 10 | #include <linux/debugfs.h> |
11 | #include <linux/uaccess.h> | 11 | #include <linux/uaccess.h> |
12 | #include <linux/hardirq.h> | 12 | #include <linux/hardirq.h> |
13 | #include <linux/kmemcheck.h> | ||
13 | #include <linux/module.h> | 14 | #include <linux/module.h> |
14 | #include <linux/percpu.h> | 15 | #include <linux/percpu.h> |
15 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
@@ -22,6 +23,28 @@ | |||
22 | #include "trace.h" | 23 | #include "trace.h" |
23 | 24 | ||
24 | /* | 25 | /* |
26 | * The ring buffer header is special. We must manually up keep it. | ||
27 | */ | ||
28 | int ring_buffer_print_entry_header(struct trace_seq *s) | ||
29 | { | ||
30 | int ret; | ||
31 | |||
32 | ret = trace_seq_printf(s, "# compressed entry header\n"); | ||
33 | ret = trace_seq_printf(s, "\ttype_len : 5 bits\n"); | ||
34 | ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n"); | ||
35 | ret = trace_seq_printf(s, "\tarray : 32 bits\n"); | ||
36 | ret = trace_seq_printf(s, "\n"); | ||
37 | ret = trace_seq_printf(s, "\tpadding : type == %d\n", | ||
38 | RINGBUF_TYPE_PADDING); | ||
39 | ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", | ||
40 | RINGBUF_TYPE_TIME_EXTEND); | ||
41 | ret = trace_seq_printf(s, "\tdata max type_len == %d\n", | ||
42 | RINGBUF_TYPE_DATA_TYPE_LEN_MAX); | ||
43 | |||
44 | return ret; | ||
45 | } | ||
46 | |||
47 | /* | ||
25 | * The ring buffer is made up of a list of pages. A separate list of pages is | 48 | * The ring buffer is made up of a list of pages. A separate list of pages is |
26 | * allocated for each CPU. A writer may only write to a buffer that is | 49 | * allocated for each CPU. A writer may only write to a buffer that is |
27 | * associated with the CPU it is currently executing on. A reader may read | 50 | * associated with the CPU it is currently executing on. A reader may read |
@@ -182,7 +205,10 @@ EXPORT_SYMBOL_GPL(tracing_is_on); | |||
182 | 205 | ||
183 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) | 206 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) |
184 | #define RB_ALIGNMENT 4U | 207 | #define RB_ALIGNMENT 4U |
185 | #define RB_MAX_SMALL_DATA 28 | 208 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
209 | |||
210 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ | ||
211 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX | ||
186 | 212 | ||
187 | enum { | 213 | enum { |
188 | RB_LEN_TIME_EXTEND = 8, | 214 | RB_LEN_TIME_EXTEND = 8, |
@@ -191,48 +217,28 @@ enum { | |||
191 | 217 | ||
192 | static inline int rb_null_event(struct ring_buffer_event *event) | 218 | static inline int rb_null_event(struct ring_buffer_event *event) |
193 | { | 219 | { |
194 | return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0; | 220 | return event->type_len == RINGBUF_TYPE_PADDING |
221 | && event->time_delta == 0; | ||
195 | } | 222 | } |
196 | 223 | ||
197 | static inline int rb_discarded_event(struct ring_buffer_event *event) | 224 | static inline int rb_discarded_event(struct ring_buffer_event *event) |
198 | { | 225 | { |
199 | return event->type == RINGBUF_TYPE_PADDING && event->time_delta; | 226 | return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta; |
200 | } | 227 | } |
201 | 228 | ||
202 | static void rb_event_set_padding(struct ring_buffer_event *event) | 229 | static void rb_event_set_padding(struct ring_buffer_event *event) |
203 | { | 230 | { |
204 | event->type = RINGBUF_TYPE_PADDING; | 231 | event->type_len = RINGBUF_TYPE_PADDING; |
205 | event->time_delta = 0; | 232 | event->time_delta = 0; |
206 | } | 233 | } |
207 | 234 | ||
208 | /** | ||
209 | * ring_buffer_event_discard - discard an event in the ring buffer | ||
210 | * @buffer: the ring buffer | ||
211 | * @event: the event to discard | ||
212 | * | ||
213 | * Sometimes a event that is in the ring buffer needs to be ignored. | ||
214 | * This function lets the user discard an event in the ring buffer | ||
215 | * and then that event will not be read later. | ||
216 | * | ||
217 | * Note, it is up to the user to be careful with this, and protect | ||
218 | * against races. If the user discards an event that has been consumed | ||
219 | * it is possible that it could corrupt the ring buffer. | ||
220 | */ | ||
221 | void ring_buffer_event_discard(struct ring_buffer_event *event) | ||
222 | { | ||
223 | event->type = RINGBUF_TYPE_PADDING; | ||
224 | /* time delta must be non zero */ | ||
225 | if (!event->time_delta) | ||
226 | event->time_delta = 1; | ||
227 | } | ||
228 | |||
229 | static unsigned | 235 | static unsigned |
230 | rb_event_data_length(struct ring_buffer_event *event) | 236 | rb_event_data_length(struct ring_buffer_event *event) |
231 | { | 237 | { |
232 | unsigned length; | 238 | unsigned length; |
233 | 239 | ||
234 | if (event->len) | 240 | if (event->type_len) |
235 | length = event->len * RB_ALIGNMENT; | 241 | length = event->type_len * RB_ALIGNMENT; |
236 | else | 242 | else |
237 | length = event->array[0]; | 243 | length = event->array[0]; |
238 | return length + RB_EVNT_HDR_SIZE; | 244 | return length + RB_EVNT_HDR_SIZE; |
@@ -242,12 +248,12 @@ rb_event_data_length(struct ring_buffer_event *event) | |||
242 | static unsigned | 248 | static unsigned |
243 | rb_event_length(struct ring_buffer_event *event) | 249 | rb_event_length(struct ring_buffer_event *event) |
244 | { | 250 | { |
245 | switch (event->type) { | 251 | switch (event->type_len) { |
246 | case RINGBUF_TYPE_PADDING: | 252 | case RINGBUF_TYPE_PADDING: |
247 | if (rb_null_event(event)) | 253 | if (rb_null_event(event)) |
248 | /* undefined */ | 254 | /* undefined */ |
249 | return -1; | 255 | return -1; |
250 | return rb_event_data_length(event); | 256 | return event->array[0] + RB_EVNT_HDR_SIZE; |
251 | 257 | ||
252 | case RINGBUF_TYPE_TIME_EXTEND: | 258 | case RINGBUF_TYPE_TIME_EXTEND: |
253 | return RB_LEN_TIME_EXTEND; | 259 | return RB_LEN_TIME_EXTEND; |
@@ -271,7 +277,7 @@ rb_event_length(struct ring_buffer_event *event) | |||
271 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) | 277 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) |
272 | { | 278 | { |
273 | unsigned length = rb_event_length(event); | 279 | unsigned length = rb_event_length(event); |
274 | if (event->type != RINGBUF_TYPE_DATA) | 280 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
275 | return length; | 281 | return length; |
276 | length -= RB_EVNT_HDR_SIZE; | 282 | length -= RB_EVNT_HDR_SIZE; |
277 | if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) | 283 | if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) |
@@ -284,9 +290,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length); | |||
284 | static void * | 290 | static void * |
285 | rb_event_data(struct ring_buffer_event *event) | 291 | rb_event_data(struct ring_buffer_event *event) |
286 | { | 292 | { |
287 | BUG_ON(event->type != RINGBUF_TYPE_DATA); | 293 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); |
288 | /* If length is in len field, then array[0] has the data */ | 294 | /* If length is in len field, then array[0] has the data */ |
289 | if (event->len) | 295 | if (event->type_len) |
290 | return (void *)&event->array[0]; | 296 | return (void *)&event->array[0]; |
291 | /* Otherwise length is in array[0] and array[1] has the data */ | 297 | /* Otherwise length is in array[0] and array[1] has the data */ |
292 | return (void *)&event->array[1]; | 298 | return (void *)&event->array[1]; |
@@ -316,9 +322,10 @@ struct buffer_data_page { | |||
316 | }; | 322 | }; |
317 | 323 | ||
318 | struct buffer_page { | 324 | struct buffer_page { |
325 | struct list_head list; /* list of buffer pages */ | ||
319 | local_t write; /* index for next write */ | 326 | local_t write; /* index for next write */ |
320 | unsigned read; /* index for next read */ | 327 | unsigned read; /* index for next read */ |
321 | struct list_head list; /* list of free pages */ | 328 | local_t entries; /* entries on this page */ |
322 | struct buffer_data_page *page; /* Actual data page */ | 329 | struct buffer_data_page *page; /* Actual data page */ |
323 | }; | 330 | }; |
324 | 331 | ||
@@ -361,6 +368,34 @@ static inline int test_time_stamp(u64 delta) | |||
361 | 368 | ||
362 | #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) | 369 | #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) |
363 | 370 | ||
371 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ | ||
372 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) | ||
373 | |||
374 | /* Max number of timestamps that can fit on a page */ | ||
375 | #define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_STAMP) | ||
376 | |||
377 | int ring_buffer_print_page_header(struct trace_seq *s) | ||
378 | { | ||
379 | struct buffer_data_page field; | ||
380 | int ret; | ||
381 | |||
382 | ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" | ||
383 | "offset:0;\tsize:%u;\n", | ||
384 | (unsigned int)sizeof(field.time_stamp)); | ||
385 | |||
386 | ret = trace_seq_printf(s, "\tfield: local_t commit;\t" | ||
387 | "offset:%u;\tsize:%u;\n", | ||
388 | (unsigned int)offsetof(typeof(field), commit), | ||
389 | (unsigned int)sizeof(field.commit)); | ||
390 | |||
391 | ret = trace_seq_printf(s, "\tfield: char data;\t" | ||
392 | "offset:%u;\tsize:%u;\n", | ||
393 | (unsigned int)offsetof(typeof(field), data), | ||
394 | (unsigned int)BUF_PAGE_SIZE); | ||
395 | |||
396 | return ret; | ||
397 | } | ||
398 | |||
364 | /* | 399 | /* |
365 | * head_page == tail_page && head == tail then buffer is empty. | 400 | * head_page == tail_page && head == tail then buffer is empty. |
366 | */ | 401 | */ |
@@ -375,8 +410,11 @@ struct ring_buffer_per_cpu { | |||
375 | struct buffer_page *tail_page; /* write to tail */ | 410 | struct buffer_page *tail_page; /* write to tail */ |
376 | struct buffer_page *commit_page; /* committed pages */ | 411 | struct buffer_page *commit_page; /* committed pages */ |
377 | struct buffer_page *reader_page; | 412 | struct buffer_page *reader_page; |
413 | unsigned long nmi_dropped; | ||
414 | unsigned long commit_overrun; | ||
378 | unsigned long overrun; | 415 | unsigned long overrun; |
379 | unsigned long entries; | 416 | unsigned long read; |
417 | local_t entries; | ||
380 | u64 write_stamp; | 418 | u64 write_stamp; |
381 | u64 read_stamp; | 419 | u64 read_stamp; |
382 | atomic_t record_disabled; | 420 | atomic_t record_disabled; |
@@ -389,6 +427,8 @@ struct ring_buffer { | |||
389 | atomic_t record_disabled; | 427 | atomic_t record_disabled; |
390 | cpumask_var_t cpumask; | 428 | cpumask_var_t cpumask; |
391 | 429 | ||
430 | struct lock_class_key *reader_lock_key; | ||
431 | |||
392 | struct mutex mutex; | 432 | struct mutex mutex; |
393 | 433 | ||
394 | struct ring_buffer_per_cpu **buffers; | 434 | struct ring_buffer_per_cpu **buffers; |
@@ -420,13 +460,18 @@ struct ring_buffer_iter { | |||
420 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | 460 | /* Up this if you want to test the TIME_EXTENTS and normalization */ |
421 | #define DEBUG_SHIFT 0 | 461 | #define DEBUG_SHIFT 0 |
422 | 462 | ||
463 | static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu) | ||
464 | { | ||
465 | /* shift to debug/test normalization and TIME_EXTENTS */ | ||
466 | return buffer->clock() << DEBUG_SHIFT; | ||
467 | } | ||
468 | |||
423 | u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) | 469 | u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) |
424 | { | 470 | { |
425 | u64 time; | 471 | u64 time; |
426 | 472 | ||
427 | preempt_disable_notrace(); | 473 | preempt_disable_notrace(); |
428 | /* shift to debug/test normalization and TIME_EXTENTS */ | 474 | time = rb_time_stamp(buffer, cpu); |
429 | time = buffer->clock() << DEBUG_SHIFT; | ||
430 | preempt_enable_no_resched_notrace(); | 475 | preempt_enable_no_resched_notrace(); |
431 | 476 | ||
432 | return time; | 477 | return time; |
@@ -523,6 +568,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
523 | cpu_buffer->cpu = cpu; | 568 | cpu_buffer->cpu = cpu; |
524 | cpu_buffer->buffer = buffer; | 569 | cpu_buffer->buffer = buffer; |
525 | spin_lock_init(&cpu_buffer->reader_lock); | 570 | spin_lock_init(&cpu_buffer->reader_lock); |
571 | lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); | ||
526 | cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 572 | cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; |
527 | INIT_LIST_HEAD(&cpu_buffer->pages); | 573 | INIT_LIST_HEAD(&cpu_buffer->pages); |
528 | 574 | ||
@@ -593,7 +639,8 @@ static int rb_cpu_notify(struct notifier_block *self, | |||
593 | * when the buffer wraps. If this flag is not set, the buffer will | 639 | * when the buffer wraps. If this flag is not set, the buffer will |
594 | * drop data when the tail hits the head. | 640 | * drop data when the tail hits the head. |
595 | */ | 641 | */ |
596 | struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) | 642 | struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, |
643 | struct lock_class_key *key) | ||
597 | { | 644 | { |
598 | struct ring_buffer *buffer; | 645 | struct ring_buffer *buffer; |
599 | int bsize; | 646 | int bsize; |
@@ -616,6 +663,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) | |||
616 | buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 663 | buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
617 | buffer->flags = flags; | 664 | buffer->flags = flags; |
618 | buffer->clock = trace_clock_local; | 665 | buffer->clock = trace_clock_local; |
666 | buffer->reader_lock_key = key; | ||
619 | 667 | ||
620 | /* need at least two pages */ | 668 | /* need at least two pages */ |
621 | if (buffer->pages == 1) | 669 | if (buffer->pages == 1) |
@@ -673,7 +721,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) | |||
673 | kfree(buffer); | 721 | kfree(buffer); |
674 | return NULL; | 722 | return NULL; |
675 | } | 723 | } |
676 | EXPORT_SYMBOL_GPL(ring_buffer_alloc); | 724 | EXPORT_SYMBOL_GPL(__ring_buffer_alloc); |
677 | 725 | ||
678 | /** | 726 | /** |
679 | * ring_buffer_free - free a ring buffer. | 727 | * ring_buffer_free - free a ring buffer. |
@@ -947,31 +995,6 @@ static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer) | |||
947 | return rb_page_commit(cpu_buffer->head_page); | 995 | return rb_page_commit(cpu_buffer->head_page); |
948 | } | 996 | } |
949 | 997 | ||
950 | /* | ||
951 | * When the tail hits the head and the buffer is in overwrite mode, | ||
952 | * the head jumps to the next page and all content on the previous | ||
953 | * page is discarded. But before doing so, we update the overrun | ||
954 | * variable of the buffer. | ||
955 | */ | ||
956 | static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer) | ||
957 | { | ||
958 | struct ring_buffer_event *event; | ||
959 | unsigned long head; | ||
960 | |||
961 | for (head = 0; head < rb_head_size(cpu_buffer); | ||
962 | head += rb_event_length(event)) { | ||
963 | |||
964 | event = __rb_page_index(cpu_buffer->head_page, head); | ||
965 | if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) | ||
966 | return; | ||
967 | /* Only count data entries */ | ||
968 | if (event->type != RINGBUF_TYPE_DATA) | ||
969 | continue; | ||
970 | cpu_buffer->overrun++; | ||
971 | cpu_buffer->entries--; | ||
972 | } | ||
973 | } | ||
974 | |||
975 | static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, | 998 | static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, |
976 | struct buffer_page **bpage) | 999 | struct buffer_page **bpage) |
977 | { | 1000 | { |
@@ -991,7 +1014,7 @@ rb_event_index(struct ring_buffer_event *event) | |||
991 | return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); | 1014 | return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); |
992 | } | 1015 | } |
993 | 1016 | ||
994 | static int | 1017 | static inline int |
995 | rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | 1018 | rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, |
996 | struct ring_buffer_event *event) | 1019 | struct ring_buffer_event *event) |
997 | { | 1020 | { |
@@ -1110,28 +1133,21 @@ static void | |||
1110 | rb_update_event(struct ring_buffer_event *event, | 1133 | rb_update_event(struct ring_buffer_event *event, |
1111 | unsigned type, unsigned length) | 1134 | unsigned type, unsigned length) |
1112 | { | 1135 | { |
1113 | event->type = type; | 1136 | event->type_len = type; |
1114 | 1137 | ||
1115 | switch (type) { | 1138 | switch (type) { |
1116 | 1139 | ||
1117 | case RINGBUF_TYPE_PADDING: | 1140 | case RINGBUF_TYPE_PADDING: |
1118 | break; | ||
1119 | |||
1120 | case RINGBUF_TYPE_TIME_EXTEND: | 1141 | case RINGBUF_TYPE_TIME_EXTEND: |
1121 | event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT); | ||
1122 | break; | ||
1123 | |||
1124 | case RINGBUF_TYPE_TIME_STAMP: | 1142 | case RINGBUF_TYPE_TIME_STAMP: |
1125 | event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT); | ||
1126 | break; | 1143 | break; |
1127 | 1144 | ||
1128 | case RINGBUF_TYPE_DATA: | 1145 | case 0: |
1129 | length -= RB_EVNT_HDR_SIZE; | 1146 | length -= RB_EVNT_HDR_SIZE; |
1130 | if (length > RB_MAX_SMALL_DATA) { | 1147 | if (length > RB_MAX_SMALL_DATA) |
1131 | event->len = 0; | ||
1132 | event->array[0] = length; | 1148 | event->array[0] = length; |
1133 | } else | 1149 | else |
1134 | event->len = DIV_ROUND_UP(length, RB_ALIGNMENT); | 1150 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); |
1135 | break; | 1151 | break; |
1136 | default: | 1152 | default: |
1137 | BUG(); | 1153 | BUG(); |
@@ -1155,131 +1171,157 @@ static unsigned rb_calculate_event_length(unsigned length) | |||
1155 | return length; | 1171 | return length; |
1156 | } | 1172 | } |
1157 | 1173 | ||
1174 | |||
1158 | static struct ring_buffer_event * | 1175 | static struct ring_buffer_event * |
1159 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | 1176 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, |
1160 | unsigned type, unsigned long length, u64 *ts) | 1177 | unsigned long length, unsigned long tail, |
1178 | struct buffer_page *commit_page, | ||
1179 | struct buffer_page *tail_page, u64 *ts) | ||
1161 | { | 1180 | { |
1162 | struct buffer_page *tail_page, *head_page, *reader_page, *commit_page; | 1181 | struct buffer_page *next_page, *head_page, *reader_page; |
1163 | unsigned long tail, write; | ||
1164 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1182 | struct ring_buffer *buffer = cpu_buffer->buffer; |
1165 | struct ring_buffer_event *event; | 1183 | struct ring_buffer_event *event; |
1166 | unsigned long flags; | ||
1167 | bool lock_taken = false; | 1184 | bool lock_taken = false; |
1185 | unsigned long flags; | ||
1168 | 1186 | ||
1169 | commit_page = cpu_buffer->commit_page; | 1187 | next_page = tail_page; |
1170 | /* we just need to protect against interrupts */ | ||
1171 | barrier(); | ||
1172 | tail_page = cpu_buffer->tail_page; | ||
1173 | write = local_add_return(length, &tail_page->write); | ||
1174 | tail = write - length; | ||
1175 | 1188 | ||
1176 | /* See if we shot pass the end of this buffer page */ | 1189 | local_irq_save(flags); |
1177 | if (write > BUF_PAGE_SIZE) { | 1190 | /* |
1178 | struct buffer_page *next_page = tail_page; | 1191 | * Since the write to the buffer is still not |
1192 | * fully lockless, we must be careful with NMIs. | ||
1193 | * The locks in the writers are taken when a write | ||
1194 | * crosses to a new page. The locks protect against | ||
1195 | * races with the readers (this will soon be fixed | ||
1196 | * with a lockless solution). | ||
1197 | * | ||
1198 | * Because we can not protect against NMIs, and we | ||
1199 | * want to keep traces reentrant, we need to manage | ||
1200 | * what happens when we are in an NMI. | ||
1201 | * | ||
1202 | * NMIs can happen after we take the lock. | ||
1203 | * If we are in an NMI, only take the lock | ||
1204 | * if it is not already taken. Otherwise | ||
1205 | * simply fail. | ||
1206 | */ | ||
1207 | if (unlikely(in_nmi())) { | ||
1208 | if (!__raw_spin_trylock(&cpu_buffer->lock)) { | ||
1209 | cpu_buffer->nmi_dropped++; | ||
1210 | goto out_reset; | ||
1211 | } | ||
1212 | } else | ||
1213 | __raw_spin_lock(&cpu_buffer->lock); | ||
1179 | 1214 | ||
1180 | local_irq_save(flags); | 1215 | lock_taken = true; |
1181 | /* | ||
1182 | * Since the write to the buffer is still not | ||
1183 | * fully lockless, we must be careful with NMIs. | ||
1184 | * The locks in the writers are taken when a write | ||
1185 | * crosses to a new page. The locks protect against | ||
1186 | * races with the readers (this will soon be fixed | ||
1187 | * with a lockless solution). | ||
1188 | * | ||
1189 | * Because we can not protect against NMIs, and we | ||
1190 | * want to keep traces reentrant, we need to manage | ||
1191 | * what happens when we are in an NMI. | ||
1192 | * | ||
1193 | * NMIs can happen after we take the lock. | ||
1194 | * If we are in an NMI, only take the lock | ||
1195 | * if it is not already taken. Otherwise | ||
1196 | * simply fail. | ||
1197 | */ | ||
1198 | if (unlikely(in_nmi())) { | ||
1199 | if (!__raw_spin_trylock(&cpu_buffer->lock)) | ||
1200 | goto out_reset; | ||
1201 | } else | ||
1202 | __raw_spin_lock(&cpu_buffer->lock); | ||
1203 | 1216 | ||
1204 | lock_taken = true; | 1217 | rb_inc_page(cpu_buffer, &next_page); |
1205 | 1218 | ||
1206 | rb_inc_page(cpu_buffer, &next_page); | 1219 | head_page = cpu_buffer->head_page; |
1220 | reader_page = cpu_buffer->reader_page; | ||
1207 | 1221 | ||
1208 | head_page = cpu_buffer->head_page; | 1222 | /* we grabbed the lock before incrementing */ |
1209 | reader_page = cpu_buffer->reader_page; | 1223 | if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) |
1224 | goto out_reset; | ||
1210 | 1225 | ||
1211 | /* we grabbed the lock before incrementing */ | 1226 | /* |
1212 | if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) | 1227 | * If for some reason, we had an interrupt storm that made |
1213 | goto out_reset; | 1228 | * it all the way around the buffer, bail, and warn |
1229 | * about it. | ||
1230 | */ | ||
1231 | if (unlikely(next_page == commit_page)) { | ||
1232 | cpu_buffer->commit_overrun++; | ||
1233 | goto out_reset; | ||
1234 | } | ||
1214 | 1235 | ||
1215 | /* | 1236 | if (next_page == head_page) { |
1216 | * If for some reason, we had an interrupt storm that made | 1237 | if (!(buffer->flags & RB_FL_OVERWRITE)) |
1217 | * it all the way around the buffer, bail, and warn | ||
1218 | * about it. | ||
1219 | */ | ||
1220 | if (unlikely(next_page == commit_page)) { | ||
1221 | WARN_ON_ONCE(1); | ||
1222 | goto out_reset; | 1238 | goto out_reset; |
1223 | } | ||
1224 | 1239 | ||
1225 | if (next_page == head_page) { | 1240 | /* tail_page has not moved yet? */ |
1226 | if (!(buffer->flags & RB_FL_OVERWRITE)) | 1241 | if (tail_page == cpu_buffer->tail_page) { |
1227 | goto out_reset; | 1242 | /* count overflows */ |
1228 | 1243 | cpu_buffer->overrun += | |
1229 | /* tail_page has not moved yet? */ | 1244 | local_read(&head_page->entries); |
1230 | if (tail_page == cpu_buffer->tail_page) { | ||
1231 | /* count overflows */ | ||
1232 | rb_update_overflow(cpu_buffer); | ||
1233 | 1245 | ||
1234 | rb_inc_page(cpu_buffer, &head_page); | 1246 | rb_inc_page(cpu_buffer, &head_page); |
1235 | cpu_buffer->head_page = head_page; | 1247 | cpu_buffer->head_page = head_page; |
1236 | cpu_buffer->head_page->read = 0; | 1248 | cpu_buffer->head_page->read = 0; |
1237 | } | ||
1238 | } | 1249 | } |
1250 | } | ||
1239 | 1251 | ||
1240 | /* | 1252 | /* |
1241 | * If the tail page is still the same as what we think | 1253 | * If the tail page is still the same as what we think |
1242 | * it is, then it is up to us to update the tail | 1254 | * it is, then it is up to us to update the tail |
1243 | * pointer. | 1255 | * pointer. |
1244 | */ | 1256 | */ |
1245 | if (tail_page == cpu_buffer->tail_page) { | 1257 | if (tail_page == cpu_buffer->tail_page) { |
1246 | local_set(&next_page->write, 0); | 1258 | local_set(&next_page->write, 0); |
1247 | local_set(&next_page->page->commit, 0); | 1259 | local_set(&next_page->entries, 0); |
1248 | cpu_buffer->tail_page = next_page; | 1260 | local_set(&next_page->page->commit, 0); |
1261 | cpu_buffer->tail_page = next_page; | ||
1262 | |||
1263 | /* reread the time stamp */ | ||
1264 | *ts = rb_time_stamp(buffer, cpu_buffer->cpu); | ||
1265 | cpu_buffer->tail_page->page->time_stamp = *ts; | ||
1266 | } | ||
1249 | 1267 | ||
1250 | /* reread the time stamp */ | 1268 | /* |
1251 | *ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu); | 1269 | * The actual tail page has moved forward. |
1252 | cpu_buffer->tail_page->page->time_stamp = *ts; | 1270 | */ |
1253 | } | 1271 | if (tail < BUF_PAGE_SIZE) { |
1272 | /* Mark the rest of the page with padding */ | ||
1273 | event = __rb_page_index(tail_page, tail); | ||
1274 | kmemcheck_annotate_bitfield(event, bitfield); | ||
1275 | rb_event_set_padding(event); | ||
1276 | } | ||
1254 | 1277 | ||
1255 | /* | 1278 | /* Set the write back to the previous setting */ |
1256 | * The actual tail page has moved forward. | 1279 | local_sub(length, &tail_page->write); |
1257 | */ | ||
1258 | if (tail < BUF_PAGE_SIZE) { | ||
1259 | /* Mark the rest of the page with padding */ | ||
1260 | event = __rb_page_index(tail_page, tail); | ||
1261 | rb_event_set_padding(event); | ||
1262 | } | ||
1263 | 1280 | ||
1264 | if (tail <= BUF_PAGE_SIZE) | 1281 | /* |
1265 | /* Set the write back to the previous setting */ | 1282 | * If this was a commit entry that failed, |
1266 | local_set(&tail_page->write, tail); | 1283 | * increment that too |
1284 | */ | ||
1285 | if (tail_page == cpu_buffer->commit_page && | ||
1286 | tail == rb_commit_index(cpu_buffer)) { | ||
1287 | rb_set_commit_to_write(cpu_buffer); | ||
1288 | } | ||
1267 | 1289 | ||
1268 | /* | 1290 | __raw_spin_unlock(&cpu_buffer->lock); |
1269 | * If this was a commit entry that failed, | 1291 | local_irq_restore(flags); |
1270 | * increment that too | 1292 | |
1271 | */ | 1293 | /* fail and let the caller try again */ |
1272 | if (tail_page == cpu_buffer->commit_page && | 1294 | return ERR_PTR(-EAGAIN); |
1273 | tail == rb_commit_index(cpu_buffer)) { | 1295 | |
1274 | rb_set_commit_to_write(cpu_buffer); | 1296 | out_reset: |
1275 | } | 1297 | /* reset write */ |
1298 | local_sub(length, &tail_page->write); | ||
1276 | 1299 | ||
1300 | if (likely(lock_taken)) | ||
1277 | __raw_spin_unlock(&cpu_buffer->lock); | 1301 | __raw_spin_unlock(&cpu_buffer->lock); |
1278 | local_irq_restore(flags); | 1302 | local_irq_restore(flags); |
1303 | return NULL; | ||
1304 | } | ||
1279 | 1305 | ||
1280 | /* fail and let the caller try again */ | 1306 | static struct ring_buffer_event * |
1281 | return ERR_PTR(-EAGAIN); | 1307 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, |
1282 | } | 1308 | unsigned type, unsigned long length, u64 *ts) |
1309 | { | ||
1310 | struct buffer_page *tail_page, *commit_page; | ||
1311 | struct ring_buffer_event *event; | ||
1312 | unsigned long tail, write; | ||
1313 | |||
1314 | commit_page = cpu_buffer->commit_page; | ||
1315 | /* we just need to protect against interrupts */ | ||
1316 | barrier(); | ||
1317 | tail_page = cpu_buffer->tail_page; | ||
1318 | write = local_add_return(length, &tail_page->write); | ||
1319 | tail = write - length; | ||
1320 | |||
1321 | /* See if we shot pass the end of this buffer page */ | ||
1322 | if (write > BUF_PAGE_SIZE) | ||
1323 | return rb_move_tail(cpu_buffer, length, tail, | ||
1324 | commit_page, tail_page, ts); | ||
1283 | 1325 | ||
1284 | /* We reserved something on the buffer */ | 1326 | /* We reserved something on the buffer */ |
1285 | 1327 | ||
@@ -1287,8 +1329,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1287 | return NULL; | 1329 | return NULL; |
1288 | 1330 | ||
1289 | event = __rb_page_index(tail_page, tail); | 1331 | event = __rb_page_index(tail_page, tail); |
1332 | kmemcheck_annotate_bitfield(event, bitfield); | ||
1290 | rb_update_event(event, type, length); | 1333 | rb_update_event(event, type, length); |
1291 | 1334 | ||
1335 | /* The passed in type is zero for DATA */ | ||
1336 | if (likely(!type)) | ||
1337 | local_inc(&tail_page->entries); | ||
1338 | |||
1292 | /* | 1339 | /* |
1293 | * If this is a commit and the tail is zero, then update | 1340 | * If this is a commit and the tail is zero, then update |
1294 | * this page's time stamp. | 1341 | * this page's time stamp. |
@@ -1297,16 +1344,38 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1297 | cpu_buffer->commit_page->page->time_stamp = *ts; | 1344 | cpu_buffer->commit_page->page->time_stamp = *ts; |
1298 | 1345 | ||
1299 | return event; | 1346 | return event; |
1347 | } | ||
1300 | 1348 | ||
1301 | out_reset: | 1349 | static inline int |
1302 | /* reset write */ | 1350 | rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, |
1303 | if (tail <= BUF_PAGE_SIZE) | 1351 | struct ring_buffer_event *event) |
1304 | local_set(&tail_page->write, tail); | 1352 | { |
1353 | unsigned long new_index, old_index; | ||
1354 | struct buffer_page *bpage; | ||
1355 | unsigned long index; | ||
1356 | unsigned long addr; | ||
1305 | 1357 | ||
1306 | if (likely(lock_taken)) | 1358 | new_index = rb_event_index(event); |
1307 | __raw_spin_unlock(&cpu_buffer->lock); | 1359 | old_index = new_index + rb_event_length(event); |
1308 | local_irq_restore(flags); | 1360 | addr = (unsigned long)event; |
1309 | return NULL; | 1361 | addr &= PAGE_MASK; |
1362 | |||
1363 | bpage = cpu_buffer->tail_page; | ||
1364 | |||
1365 | if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { | ||
1366 | /* | ||
1367 | * This is on the tail page. It is possible that | ||
1368 | * a write could come in and move the tail page | ||
1369 | * and write to the next page. That is fine | ||
1370 | * because we just shorten what is on this page. | ||
1371 | */ | ||
1372 | index = local_cmpxchg(&bpage->write, old_index, new_index); | ||
1373 | if (index == old_index) | ||
1374 | return 1; | ||
1375 | } | ||
1376 | |||
1377 | /* could not discard */ | ||
1378 | return 0; | ||
1310 | } | 1379 | } |
1311 | 1380 | ||
1312 | static int | 1381 | static int |
@@ -1351,16 +1420,23 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
1351 | event->array[0] = *delta >> TS_SHIFT; | 1420 | event->array[0] = *delta >> TS_SHIFT; |
1352 | } else { | 1421 | } else { |
1353 | cpu_buffer->commit_page->page->time_stamp = *ts; | 1422 | cpu_buffer->commit_page->page->time_stamp = *ts; |
1354 | event->time_delta = 0; | 1423 | /* try to discard, since we do not need this */ |
1355 | event->array[0] = 0; | 1424 | if (!rb_try_to_discard(cpu_buffer, event)) { |
1425 | /* nope, just zero it */ | ||
1426 | event->time_delta = 0; | ||
1427 | event->array[0] = 0; | ||
1428 | } | ||
1356 | } | 1429 | } |
1357 | cpu_buffer->write_stamp = *ts; | 1430 | cpu_buffer->write_stamp = *ts; |
1358 | /* let the caller know this was the commit */ | 1431 | /* let the caller know this was the commit */ |
1359 | ret = 1; | 1432 | ret = 1; |
1360 | } else { | 1433 | } else { |
1361 | /* Darn, this is just wasted space */ | 1434 | /* Try to discard the event */ |
1362 | event->time_delta = 0; | 1435 | if (!rb_try_to_discard(cpu_buffer, event)) { |
1363 | event->array[0] = 0; | 1436 | /* Darn, this is just wasted space */ |
1437 | event->time_delta = 0; | ||
1438 | event->array[0] = 0; | ||
1439 | } | ||
1364 | ret = 0; | 1440 | ret = 0; |
1365 | } | 1441 | } |
1366 | 1442 | ||
@@ -1371,13 +1447,14 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
1371 | 1447 | ||
1372 | static struct ring_buffer_event * | 1448 | static struct ring_buffer_event * |
1373 | rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | 1449 | rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, |
1374 | unsigned type, unsigned long length) | 1450 | unsigned long length) |
1375 | { | 1451 | { |
1376 | struct ring_buffer_event *event; | 1452 | struct ring_buffer_event *event; |
1377 | u64 ts, delta; | 1453 | u64 ts, delta = 0; |
1378 | int commit = 0; | 1454 | int commit = 0; |
1379 | int nr_loops = 0; | 1455 | int nr_loops = 0; |
1380 | 1456 | ||
1457 | length = rb_calculate_event_length(length); | ||
1381 | again: | 1458 | again: |
1382 | /* | 1459 | /* |
1383 | * We allow for interrupts to reenter here and do a trace. | 1460 | * We allow for interrupts to reenter here and do a trace. |
@@ -1391,7 +1468,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1391 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) | 1468 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) |
1392 | return NULL; | 1469 | return NULL; |
1393 | 1470 | ||
1394 | ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); | 1471 | ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); |
1395 | 1472 | ||
1396 | /* | 1473 | /* |
1397 | * Only the first commit can update the timestamp. | 1474 | * Only the first commit can update the timestamp. |
@@ -1401,23 +1478,24 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1401 | * also be made. But only the entry that did the actual | 1478 | * also be made. But only the entry that did the actual |
1402 | * commit will be something other than zero. | 1479 | * commit will be something other than zero. |
1403 | */ | 1480 | */ |
1404 | if (cpu_buffer->tail_page == cpu_buffer->commit_page && | 1481 | if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page && |
1405 | rb_page_write(cpu_buffer->tail_page) == | 1482 | rb_page_write(cpu_buffer->tail_page) == |
1406 | rb_commit_index(cpu_buffer)) { | 1483 | rb_commit_index(cpu_buffer))) { |
1484 | u64 diff; | ||
1407 | 1485 | ||
1408 | delta = ts - cpu_buffer->write_stamp; | 1486 | diff = ts - cpu_buffer->write_stamp; |
1409 | 1487 | ||
1410 | /* make sure this delta is calculated here */ | 1488 | /* make sure this diff is calculated here */ |
1411 | barrier(); | 1489 | barrier(); |
1412 | 1490 | ||
1413 | /* Did the write stamp get updated already? */ | 1491 | /* Did the write stamp get updated already? */ |
1414 | if (unlikely(ts < cpu_buffer->write_stamp)) | 1492 | if (unlikely(ts < cpu_buffer->write_stamp)) |
1415 | delta = 0; | 1493 | goto get_event; |
1416 | 1494 | ||
1417 | if (test_time_stamp(delta)) { | 1495 | delta = diff; |
1496 | if (unlikely(test_time_stamp(delta))) { | ||
1418 | 1497 | ||
1419 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); | 1498 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); |
1420 | |||
1421 | if (commit == -EBUSY) | 1499 | if (commit == -EBUSY) |
1422 | return NULL; | 1500 | return NULL; |
1423 | 1501 | ||
@@ -1426,12 +1504,11 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1426 | 1504 | ||
1427 | RB_WARN_ON(cpu_buffer, commit < 0); | 1505 | RB_WARN_ON(cpu_buffer, commit < 0); |
1428 | } | 1506 | } |
1429 | } else | 1507 | } |
1430 | /* Non commits have zero deltas */ | ||
1431 | delta = 0; | ||
1432 | 1508 | ||
1433 | event = __rb_reserve_next(cpu_buffer, type, length, &ts); | 1509 | get_event: |
1434 | if (PTR_ERR(event) == -EAGAIN) | 1510 | event = __rb_reserve_next(cpu_buffer, 0, length, &ts); |
1511 | if (unlikely(PTR_ERR(event) == -EAGAIN)) | ||
1435 | goto again; | 1512 | goto again; |
1436 | 1513 | ||
1437 | if (!event) { | 1514 | if (!event) { |
@@ -1448,7 +1525,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1448 | * If the timestamp was commited, make the commit our entry | 1525 | * If the timestamp was commited, make the commit our entry |
1449 | * now so that we will update it when needed. | 1526 | * now so that we will update it when needed. |
1450 | */ | 1527 | */ |
1451 | if (commit) | 1528 | if (unlikely(commit)) |
1452 | rb_set_commit_event(cpu_buffer, event); | 1529 | rb_set_commit_event(cpu_buffer, event); |
1453 | else if (!rb_is_commit(cpu_buffer, event)) | 1530 | else if (!rb_is_commit(cpu_buffer, event)) |
1454 | delta = 0; | 1531 | delta = 0; |
@@ -1458,6 +1535,36 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1458 | return event; | 1535 | return event; |
1459 | } | 1536 | } |
1460 | 1537 | ||
1538 | #define TRACE_RECURSIVE_DEPTH 16 | ||
1539 | |||
1540 | static int trace_recursive_lock(void) | ||
1541 | { | ||
1542 | current->trace_recursion++; | ||
1543 | |||
1544 | if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) | ||
1545 | return 0; | ||
1546 | |||
1547 | /* Disable all tracing before we do anything else */ | ||
1548 | tracing_off_permanent(); | ||
1549 | |||
1550 | printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" | ||
1551 | "HC[%lu]:SC[%lu]:NMI[%lu]\n", | ||
1552 | current->trace_recursion, | ||
1553 | hardirq_count() >> HARDIRQ_SHIFT, | ||
1554 | softirq_count() >> SOFTIRQ_SHIFT, | ||
1555 | in_nmi()); | ||
1556 | |||
1557 | WARN_ON_ONCE(1); | ||
1558 | return -1; | ||
1559 | } | ||
1560 | |||
1561 | static void trace_recursive_unlock(void) | ||
1562 | { | ||
1563 | WARN_ON_ONCE(!current->trace_recursion); | ||
1564 | |||
1565 | current->trace_recursion--; | ||
1566 | } | ||
1567 | |||
1461 | static DEFINE_PER_CPU(int, rb_need_resched); | 1568 | static DEFINE_PER_CPU(int, rb_need_resched); |
1462 | 1569 | ||
1463 | /** | 1570 | /** |
@@ -1491,6 +1598,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
1491 | /* If we are tracing schedule, we don't want to recurse */ | 1598 | /* If we are tracing schedule, we don't want to recurse */ |
1492 | resched = ftrace_preempt_disable(); | 1599 | resched = ftrace_preempt_disable(); |
1493 | 1600 | ||
1601 | if (trace_recursive_lock()) | ||
1602 | goto out_nocheck; | ||
1603 | |||
1494 | cpu = raw_smp_processor_id(); | 1604 | cpu = raw_smp_processor_id(); |
1495 | 1605 | ||
1496 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 1606 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
@@ -1501,11 +1611,10 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
1501 | if (atomic_read(&cpu_buffer->record_disabled)) | 1611 | if (atomic_read(&cpu_buffer->record_disabled)) |
1502 | goto out; | 1612 | goto out; |
1503 | 1613 | ||
1504 | length = rb_calculate_event_length(length); | 1614 | if (length > BUF_MAX_DATA_SIZE) |
1505 | if (length > BUF_PAGE_SIZE) | ||
1506 | goto out; | 1615 | goto out; |
1507 | 1616 | ||
1508 | event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length); | 1617 | event = rb_reserve_next_event(cpu_buffer, length); |
1509 | if (!event) | 1618 | if (!event) |
1510 | goto out; | 1619 | goto out; |
1511 | 1620 | ||
@@ -1520,6 +1629,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
1520 | return event; | 1629 | return event; |
1521 | 1630 | ||
1522 | out: | 1631 | out: |
1632 | trace_recursive_unlock(); | ||
1633 | |||
1634 | out_nocheck: | ||
1523 | ftrace_preempt_enable(resched); | 1635 | ftrace_preempt_enable(resched); |
1524 | return NULL; | 1636 | return NULL; |
1525 | } | 1637 | } |
@@ -1528,7 +1640,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); | |||
1528 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | 1640 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, |
1529 | struct ring_buffer_event *event) | 1641 | struct ring_buffer_event *event) |
1530 | { | 1642 | { |
1531 | cpu_buffer->entries++; | 1643 | local_inc(&cpu_buffer->entries); |
1532 | 1644 | ||
1533 | /* Only process further if we own the commit */ | 1645 | /* Only process further if we own the commit */ |
1534 | if (!rb_is_commit(cpu_buffer, event)) | 1646 | if (!rb_is_commit(cpu_buffer, event)) |
@@ -1558,6 +1670,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, | |||
1558 | 1670 | ||
1559 | rb_commit(cpu_buffer, event); | 1671 | rb_commit(cpu_buffer, event); |
1560 | 1672 | ||
1673 | trace_recursive_unlock(); | ||
1674 | |||
1561 | /* | 1675 | /* |
1562 | * Only the last preempt count needs to restore preemption. | 1676 | * Only the last preempt count needs to restore preemption. |
1563 | */ | 1677 | */ |
@@ -1570,6 +1684,99 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, | |||
1570 | } | 1684 | } |
1571 | EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); | 1685 | EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); |
1572 | 1686 | ||
1687 | static inline void rb_event_discard(struct ring_buffer_event *event) | ||
1688 | { | ||
1689 | /* array[0] holds the actual length for the discarded event */ | ||
1690 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; | ||
1691 | event->type_len = RINGBUF_TYPE_PADDING; | ||
1692 | /* time delta must be non zero */ | ||
1693 | if (!event->time_delta) | ||
1694 | event->time_delta = 1; | ||
1695 | } | ||
1696 | |||
1697 | /** | ||
1698 | * ring_buffer_event_discard - discard any event in the ring buffer | ||
1699 | * @event: the event to discard | ||
1700 | * | ||
1701 | * Sometimes a event that is in the ring buffer needs to be ignored. | ||
1702 | * This function lets the user discard an event in the ring buffer | ||
1703 | * and then that event will not be read later. | ||
1704 | * | ||
1705 | * Note, it is up to the user to be careful with this, and protect | ||
1706 | * against races. If the user discards an event that has been consumed | ||
1707 | * it is possible that it could corrupt the ring buffer. | ||
1708 | */ | ||
1709 | void ring_buffer_event_discard(struct ring_buffer_event *event) | ||
1710 | { | ||
1711 | rb_event_discard(event); | ||
1712 | } | ||
1713 | EXPORT_SYMBOL_GPL(ring_buffer_event_discard); | ||
1714 | |||
1715 | /** | ||
1716 | * ring_buffer_commit_discard - discard an event that has not been committed | ||
1717 | * @buffer: the ring buffer | ||
1718 | * @event: non committed event to discard | ||
1719 | * | ||
1720 | * This is similar to ring_buffer_event_discard but must only be | ||
1721 | * performed on an event that has not been committed yet. The difference | ||
1722 | * is that this will also try to free the event from the ring buffer | ||
1723 | * if another event has not been added behind it. | ||
1724 | * | ||
1725 | * If another event has been added behind it, it will set the event | ||
1726 | * up as discarded, and perform the commit. | ||
1727 | * | ||
1728 | * If this function is called, do not call ring_buffer_unlock_commit on | ||
1729 | * the event. | ||
1730 | */ | ||
1731 | void ring_buffer_discard_commit(struct ring_buffer *buffer, | ||
1732 | struct ring_buffer_event *event) | ||
1733 | { | ||
1734 | struct ring_buffer_per_cpu *cpu_buffer; | ||
1735 | int cpu; | ||
1736 | |||
1737 | /* The event is discarded regardless */ | ||
1738 | rb_event_discard(event); | ||
1739 | |||
1740 | /* | ||
1741 | * This must only be called if the event has not been | ||
1742 | * committed yet. Thus we can assume that preemption | ||
1743 | * is still disabled. | ||
1744 | */ | ||
1745 | RB_WARN_ON(buffer, preemptible()); | ||
1746 | |||
1747 | cpu = smp_processor_id(); | ||
1748 | cpu_buffer = buffer->buffers[cpu]; | ||
1749 | |||
1750 | if (!rb_try_to_discard(cpu_buffer, event)) | ||
1751 | goto out; | ||
1752 | |||
1753 | /* | ||
1754 | * The commit is still visible by the reader, so we | ||
1755 | * must increment entries. | ||
1756 | */ | ||
1757 | local_inc(&cpu_buffer->entries); | ||
1758 | out: | ||
1759 | /* | ||
1760 | * If a write came in and pushed the tail page | ||
1761 | * we still need to update the commit pointer | ||
1762 | * if we were the commit. | ||
1763 | */ | ||
1764 | if (rb_is_commit(cpu_buffer, event)) | ||
1765 | rb_set_commit_to_write(cpu_buffer); | ||
1766 | |||
1767 | trace_recursive_unlock(); | ||
1768 | |||
1769 | /* | ||
1770 | * Only the last preempt count needs to restore preemption. | ||
1771 | */ | ||
1772 | if (preempt_count() == 1) | ||
1773 | ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); | ||
1774 | else | ||
1775 | preempt_enable_no_resched_notrace(); | ||
1776 | |||
1777 | } | ||
1778 | EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); | ||
1779 | |||
1573 | /** | 1780 | /** |
1574 | * ring_buffer_write - write data to the buffer without reserving | 1781 | * ring_buffer_write - write data to the buffer without reserving |
1575 | * @buffer: The ring buffer to write to. | 1782 | * @buffer: The ring buffer to write to. |
@@ -1589,7 +1796,6 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
1589 | { | 1796 | { |
1590 | struct ring_buffer_per_cpu *cpu_buffer; | 1797 | struct ring_buffer_per_cpu *cpu_buffer; |
1591 | struct ring_buffer_event *event; | 1798 | struct ring_buffer_event *event; |
1592 | unsigned long event_length; | ||
1593 | void *body; | 1799 | void *body; |
1594 | int ret = -EBUSY; | 1800 | int ret = -EBUSY; |
1595 | int cpu, resched; | 1801 | int cpu, resched; |
@@ -1612,9 +1818,10 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
1612 | if (atomic_read(&cpu_buffer->record_disabled)) | 1818 | if (atomic_read(&cpu_buffer->record_disabled)) |
1613 | goto out; | 1819 | goto out; |
1614 | 1820 | ||
1615 | event_length = rb_calculate_event_length(length); | 1821 | if (length > BUF_MAX_DATA_SIZE) |
1616 | event = rb_reserve_next_event(cpu_buffer, | 1822 | goto out; |
1617 | RINGBUF_TYPE_DATA, event_length); | 1823 | |
1824 | event = rb_reserve_next_event(cpu_buffer, length); | ||
1618 | if (!event) | 1825 | if (!event) |
1619 | goto out; | 1826 | goto out; |
1620 | 1827 | ||
@@ -1728,7 +1935,8 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) | |||
1728 | return 0; | 1935 | return 0; |
1729 | 1936 | ||
1730 | cpu_buffer = buffer->buffers[cpu]; | 1937 | cpu_buffer = buffer->buffers[cpu]; |
1731 | ret = cpu_buffer->entries; | 1938 | ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun) |
1939 | - cpu_buffer->read; | ||
1732 | 1940 | ||
1733 | return ret; | 1941 | return ret; |
1734 | } | 1942 | } |
@@ -1755,6 +1963,47 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) | |||
1755 | EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); | 1963 | EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); |
1756 | 1964 | ||
1757 | /** | 1965 | /** |
1966 | * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped | ||
1967 | * @buffer: The ring buffer | ||
1968 | * @cpu: The per CPU buffer to get the number of overruns from | ||
1969 | */ | ||
1970 | unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu) | ||
1971 | { | ||
1972 | struct ring_buffer_per_cpu *cpu_buffer; | ||
1973 | unsigned long ret; | ||
1974 | |||
1975 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
1976 | return 0; | ||
1977 | |||
1978 | cpu_buffer = buffer->buffers[cpu]; | ||
1979 | ret = cpu_buffer->nmi_dropped; | ||
1980 | |||
1981 | return ret; | ||
1982 | } | ||
1983 | EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu); | ||
1984 | |||
1985 | /** | ||
1986 | * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits | ||
1987 | * @buffer: The ring buffer | ||
1988 | * @cpu: The per CPU buffer to get the number of overruns from | ||
1989 | */ | ||
1990 | unsigned long | ||
1991 | ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu) | ||
1992 | { | ||
1993 | struct ring_buffer_per_cpu *cpu_buffer; | ||
1994 | unsigned long ret; | ||
1995 | |||
1996 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
1997 | return 0; | ||
1998 | |||
1999 | cpu_buffer = buffer->buffers[cpu]; | ||
2000 | ret = cpu_buffer->commit_overrun; | ||
2001 | |||
2002 | return ret; | ||
2003 | } | ||
2004 | EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu); | ||
2005 | |||
2006 | /** | ||
1758 | * ring_buffer_entries - get the number of entries in a buffer | 2007 | * ring_buffer_entries - get the number of entries in a buffer |
1759 | * @buffer: The ring buffer | 2008 | * @buffer: The ring buffer |
1760 | * | 2009 | * |
@@ -1770,7 +2019,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) | |||
1770 | /* if you care about this being correct, lock the buffer */ | 2019 | /* if you care about this being correct, lock the buffer */ |
1771 | for_each_buffer_cpu(buffer, cpu) { | 2020 | for_each_buffer_cpu(buffer, cpu) { |
1772 | cpu_buffer = buffer->buffers[cpu]; | 2021 | cpu_buffer = buffer->buffers[cpu]; |
1773 | entries += cpu_buffer->entries; | 2022 | entries += (local_read(&cpu_buffer->entries) - |
2023 | cpu_buffer->overrun) - cpu_buffer->read; | ||
1774 | } | 2024 | } |
1775 | 2025 | ||
1776 | return entries; | 2026 | return entries; |
@@ -1862,7 +2112,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
1862 | { | 2112 | { |
1863 | u64 delta; | 2113 | u64 delta; |
1864 | 2114 | ||
1865 | switch (event->type) { | 2115 | switch (event->type_len) { |
1866 | case RINGBUF_TYPE_PADDING: | 2116 | case RINGBUF_TYPE_PADDING: |
1867 | return; | 2117 | return; |
1868 | 2118 | ||
@@ -1893,7 +2143,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter, | |||
1893 | { | 2143 | { |
1894 | u64 delta; | 2144 | u64 delta; |
1895 | 2145 | ||
1896 | switch (event->type) { | 2146 | switch (event->type_len) { |
1897 | case RINGBUF_TYPE_PADDING: | 2147 | case RINGBUF_TYPE_PADDING: |
1898 | return; | 2148 | return; |
1899 | 2149 | ||
@@ -1966,6 +2216,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
1966 | cpu_buffer->reader_page->list.prev = reader->list.prev; | 2216 | cpu_buffer->reader_page->list.prev = reader->list.prev; |
1967 | 2217 | ||
1968 | local_set(&cpu_buffer->reader_page->write, 0); | 2218 | local_set(&cpu_buffer->reader_page->write, 0); |
2219 | local_set(&cpu_buffer->reader_page->entries, 0); | ||
1969 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 2220 | local_set(&cpu_buffer->reader_page->page->commit, 0); |
1970 | 2221 | ||
1971 | /* Make the reader page now replace the head */ | 2222 | /* Make the reader page now replace the head */ |
@@ -2008,8 +2259,9 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) | |||
2008 | 2259 | ||
2009 | event = rb_reader_event(cpu_buffer); | 2260 | event = rb_reader_event(cpu_buffer); |
2010 | 2261 | ||
2011 | if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event)) | 2262 | if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX |
2012 | cpu_buffer->entries--; | 2263 | || rb_discarded_event(event)) |
2264 | cpu_buffer->read++; | ||
2013 | 2265 | ||
2014 | rb_update_read_stamp(cpu_buffer, event); | 2266 | rb_update_read_stamp(cpu_buffer, event); |
2015 | 2267 | ||
@@ -2031,8 +2283,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) | |||
2031 | * Check if we are at the end of the buffer. | 2283 | * Check if we are at the end of the buffer. |
2032 | */ | 2284 | */ |
2033 | if (iter->head >= rb_page_size(iter->head_page)) { | 2285 | if (iter->head >= rb_page_size(iter->head_page)) { |
2034 | if (RB_WARN_ON(buffer, | 2286 | /* discarded commits can make the page empty */ |
2035 | iter->head_page == cpu_buffer->commit_page)) | 2287 | if (iter->head_page == cpu_buffer->commit_page) |
2036 | return; | 2288 | return; |
2037 | rb_inc_iter(iter); | 2289 | rb_inc_iter(iter); |
2038 | return; | 2290 | return; |
@@ -2075,12 +2327,10 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2075 | /* | 2327 | /* |
2076 | * We repeat when a timestamp is encountered. It is possible | 2328 | * We repeat when a timestamp is encountered. It is possible |
2077 | * to get multiple timestamps from an interrupt entering just | 2329 | * to get multiple timestamps from an interrupt entering just |
2078 | * as one timestamp is about to be written. The max times | 2330 | * as one timestamp is about to be written, or from discarded |
2079 | * that this can happen is the number of nested interrupts we | 2331 | * commits. The most that we can have is the number on a single page. |
2080 | * can have. Nesting 10 deep of interrupts is clearly | ||
2081 | * an anomaly. | ||
2082 | */ | 2332 | */ |
2083 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) | 2333 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) |
2084 | return NULL; | 2334 | return NULL; |
2085 | 2335 | ||
2086 | reader = rb_get_reader_page(cpu_buffer); | 2336 | reader = rb_get_reader_page(cpu_buffer); |
@@ -2089,7 +2339,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2089 | 2339 | ||
2090 | event = rb_reader_event(cpu_buffer); | 2340 | event = rb_reader_event(cpu_buffer); |
2091 | 2341 | ||
2092 | switch (event->type) { | 2342 | switch (event->type_len) { |
2093 | case RINGBUF_TYPE_PADDING: | 2343 | case RINGBUF_TYPE_PADDING: |
2094 | if (rb_null_event(event)) | 2344 | if (rb_null_event(event)) |
2095 | RB_WARN_ON(cpu_buffer, 1); | 2345 | RB_WARN_ON(cpu_buffer, 1); |
@@ -2146,14 +2396,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2146 | 2396 | ||
2147 | again: | 2397 | again: |
2148 | /* | 2398 | /* |
2149 | * We repeat when a timestamp is encountered. It is possible | 2399 | * We repeat when a timestamp is encountered. |
2150 | * to get multiple timestamps from an interrupt entering just | 2400 | * We can get multiple timestamps by nested interrupts or also |
2151 | * as one timestamp is about to be written. The max times | 2401 | * if filtering is on (discarding commits). Since discarding |
2152 | * that this can happen is the number of nested interrupts we | 2402 | * commits can be frequent we can get a lot of timestamps. |
2153 | * can have. Nesting 10 deep of interrupts is clearly | 2403 | * But we limit them by not adding timestamps if they begin |
2154 | * an anomaly. | 2404 | * at the start of a page. |
2155 | */ | 2405 | */ |
2156 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) | 2406 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) |
2157 | return NULL; | 2407 | return NULL; |
2158 | 2408 | ||
2159 | if (rb_per_cpu_empty(cpu_buffer)) | 2409 | if (rb_per_cpu_empty(cpu_buffer)) |
@@ -2161,7 +2411,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2161 | 2411 | ||
2162 | event = rb_iter_head_event(iter); | 2412 | event = rb_iter_head_event(iter); |
2163 | 2413 | ||
2164 | switch (event->type) { | 2414 | switch (event->type_len) { |
2165 | case RINGBUF_TYPE_PADDING: | 2415 | case RINGBUF_TYPE_PADDING: |
2166 | if (rb_null_event(event)) { | 2416 | if (rb_null_event(event)) { |
2167 | rb_inc_iter(iter); | 2417 | rb_inc_iter(iter); |
@@ -2220,7 +2470,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2220 | event = rb_buffer_peek(buffer, cpu, ts); | 2470 | event = rb_buffer_peek(buffer, cpu, ts); |
2221 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2471 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2222 | 2472 | ||
2223 | if (event && event->type == RINGBUF_TYPE_PADDING) { | 2473 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { |
2224 | cpu_relax(); | 2474 | cpu_relax(); |
2225 | goto again; | 2475 | goto again; |
2226 | } | 2476 | } |
@@ -2248,7 +2498,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2248 | event = rb_iter_peek(iter, ts); | 2498 | event = rb_iter_peek(iter, ts); |
2249 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2499 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2250 | 2500 | ||
2251 | if (event && event->type == RINGBUF_TYPE_PADDING) { | 2501 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { |
2252 | cpu_relax(); | 2502 | cpu_relax(); |
2253 | goto again; | 2503 | goto again; |
2254 | } | 2504 | } |
@@ -2293,7 +2543,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2293 | out: | 2543 | out: |
2294 | preempt_enable(); | 2544 | preempt_enable(); |
2295 | 2545 | ||
2296 | if (event && event->type == RINGBUF_TYPE_PADDING) { | 2546 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { |
2297 | cpu_relax(); | 2547 | cpu_relax(); |
2298 | goto again; | 2548 | goto again; |
2299 | } | 2549 | } |
@@ -2386,7 +2636,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | |||
2386 | out: | 2636 | out: |
2387 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2637 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2388 | 2638 | ||
2389 | if (event && event->type == RINGBUF_TYPE_PADDING) { | 2639 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { |
2390 | cpu_relax(); | 2640 | cpu_relax(); |
2391 | goto again; | 2641 | goto again; |
2392 | } | 2642 | } |
@@ -2411,6 +2661,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | |||
2411 | cpu_buffer->head_page | 2661 | cpu_buffer->head_page |
2412 | = list_entry(cpu_buffer->pages.next, struct buffer_page, list); | 2662 | = list_entry(cpu_buffer->pages.next, struct buffer_page, list); |
2413 | local_set(&cpu_buffer->head_page->write, 0); | 2663 | local_set(&cpu_buffer->head_page->write, 0); |
2664 | local_set(&cpu_buffer->head_page->entries, 0); | ||
2414 | local_set(&cpu_buffer->head_page->page->commit, 0); | 2665 | local_set(&cpu_buffer->head_page->page->commit, 0); |
2415 | 2666 | ||
2416 | cpu_buffer->head_page->read = 0; | 2667 | cpu_buffer->head_page->read = 0; |
@@ -2420,11 +2671,15 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | |||
2420 | 2671 | ||
2421 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); | 2672 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); |
2422 | local_set(&cpu_buffer->reader_page->write, 0); | 2673 | local_set(&cpu_buffer->reader_page->write, 0); |
2674 | local_set(&cpu_buffer->reader_page->entries, 0); | ||
2423 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 2675 | local_set(&cpu_buffer->reader_page->page->commit, 0); |
2424 | cpu_buffer->reader_page->read = 0; | 2676 | cpu_buffer->reader_page->read = 0; |
2425 | 2677 | ||
2678 | cpu_buffer->nmi_dropped = 0; | ||
2679 | cpu_buffer->commit_overrun = 0; | ||
2426 | cpu_buffer->overrun = 0; | 2680 | cpu_buffer->overrun = 0; |
2427 | cpu_buffer->entries = 0; | 2681 | cpu_buffer->read = 0; |
2682 | local_set(&cpu_buffer->entries, 0); | ||
2428 | 2683 | ||
2429 | cpu_buffer->write_stamp = 0; | 2684 | cpu_buffer->write_stamp = 0; |
2430 | cpu_buffer->read_stamp = 0; | 2685 | cpu_buffer->read_stamp = 0; |
@@ -2443,6 +2698,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) | |||
2443 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2698 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2444 | return; | 2699 | return; |
2445 | 2700 | ||
2701 | atomic_inc(&cpu_buffer->record_disabled); | ||
2702 | |||
2446 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2703 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
2447 | 2704 | ||
2448 | __raw_spin_lock(&cpu_buffer->lock); | 2705 | __raw_spin_lock(&cpu_buffer->lock); |
@@ -2452,6 +2709,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) | |||
2452 | __raw_spin_unlock(&cpu_buffer->lock); | 2709 | __raw_spin_unlock(&cpu_buffer->lock); |
2453 | 2710 | ||
2454 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2711 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2712 | |||
2713 | atomic_dec(&cpu_buffer->record_disabled); | ||
2455 | } | 2714 | } |
2456 | EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); | 2715 | EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); |
2457 | 2716 | ||
@@ -2578,28 +2837,6 @@ out: | |||
2578 | } | 2837 | } |
2579 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); | 2838 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); |
2580 | 2839 | ||
2581 | static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, | ||
2582 | struct buffer_data_page *bpage, | ||
2583 | unsigned int offset) | ||
2584 | { | ||
2585 | struct ring_buffer_event *event; | ||
2586 | unsigned long head; | ||
2587 | |||
2588 | __raw_spin_lock(&cpu_buffer->lock); | ||
2589 | for (head = offset; head < local_read(&bpage->commit); | ||
2590 | head += rb_event_length(event)) { | ||
2591 | |||
2592 | event = __rb_data_page_index(bpage, head); | ||
2593 | if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) | ||
2594 | return; | ||
2595 | /* Only count data entries */ | ||
2596 | if (event->type != RINGBUF_TYPE_DATA) | ||
2597 | continue; | ||
2598 | cpu_buffer->entries--; | ||
2599 | } | ||
2600 | __raw_spin_unlock(&cpu_buffer->lock); | ||
2601 | } | ||
2602 | |||
2603 | /** | 2840 | /** |
2604 | * ring_buffer_alloc_read_page - allocate a page to read from buffer | 2841 | * ring_buffer_alloc_read_page - allocate a page to read from buffer |
2605 | * @buffer: the buffer to allocate for. | 2842 | * @buffer: the buffer to allocate for. |
@@ -2630,6 +2867,7 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) | |||
2630 | 2867 | ||
2631 | return bpage; | 2868 | return bpage; |
2632 | } | 2869 | } |
2870 | EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page); | ||
2633 | 2871 | ||
2634 | /** | 2872 | /** |
2635 | * ring_buffer_free_read_page - free an allocated read page | 2873 | * ring_buffer_free_read_page - free an allocated read page |
@@ -2642,6 +2880,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) | |||
2642 | { | 2880 | { |
2643 | free_page((unsigned long)data); | 2881 | free_page((unsigned long)data); |
2644 | } | 2882 | } |
2883 | EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); | ||
2645 | 2884 | ||
2646 | /** | 2885 | /** |
2647 | * ring_buffer_read_page - extract a page from the ring buffer | 2886 | * ring_buffer_read_page - extract a page from the ring buffer |
@@ -2768,16 +3007,17 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
2768 | /* we copied everything to the beginning */ | 3007 | /* we copied everything to the beginning */ |
2769 | read = 0; | 3008 | read = 0; |
2770 | } else { | 3009 | } else { |
3010 | /* update the entry counter */ | ||
3011 | cpu_buffer->read += local_read(&reader->entries); | ||
3012 | |||
2771 | /* swap the pages */ | 3013 | /* swap the pages */ |
2772 | rb_init_page(bpage); | 3014 | rb_init_page(bpage); |
2773 | bpage = reader->page; | 3015 | bpage = reader->page; |
2774 | reader->page = *data_page; | 3016 | reader->page = *data_page; |
2775 | local_set(&reader->write, 0); | 3017 | local_set(&reader->write, 0); |
3018 | local_set(&reader->entries, 0); | ||
2776 | reader->read = 0; | 3019 | reader->read = 0; |
2777 | *data_page = bpage; | 3020 | *data_page = bpage; |
2778 | |||
2779 | /* update the entry counter */ | ||
2780 | rb_remove_entries(cpu_buffer, bpage, read); | ||
2781 | } | 3021 | } |
2782 | ret = read; | 3022 | ret = read; |
2783 | 3023 | ||
@@ -2787,6 +3027,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
2787 | out: | 3027 | out: |
2788 | return ret; | 3028 | return ret; |
2789 | } | 3029 | } |
3030 | EXPORT_SYMBOL_GPL(ring_buffer_read_page); | ||
2790 | 3031 | ||
2791 | static ssize_t | 3032 | static ssize_t |
2792 | rb_simple_read(struct file *filp, char __user *ubuf, | 3033 | rb_simple_read(struct file *filp, char __user *ubuf, |
@@ -2845,14 +3086,11 @@ static const struct file_operations rb_simple_fops = { | |||
2845 | static __init int rb_init_debugfs(void) | 3086 | static __init int rb_init_debugfs(void) |
2846 | { | 3087 | { |
2847 | struct dentry *d_tracer; | 3088 | struct dentry *d_tracer; |
2848 | struct dentry *entry; | ||
2849 | 3089 | ||
2850 | d_tracer = tracing_init_dentry(); | 3090 | d_tracer = tracing_init_dentry(); |
2851 | 3091 | ||
2852 | entry = debugfs_create_file("tracing_on", 0644, d_tracer, | 3092 | trace_create_file("tracing_on", 0644, d_tracer, |
2853 | &ring_buffer_flags, &rb_simple_fops); | 3093 | &ring_buffer_flags, &rb_simple_fops); |
2854 | if (!entry) | ||
2855 | pr_warning("Could not create debugfs 'tracing_on' entry\n"); | ||
2856 | 3094 | ||
2857 | return 0; | 3095 | return 0; |
2858 | } | 3096 | } |