aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorSteven Rostedt <rostedt@goodmis.org>2008-10-01 00:29:53 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-14 04:39:05 -0400
commitd769041f865330034131525ee6a7f72eb4af2a24 (patch)
tree5c0d93063585c9a94d3c8e8105cc7ad27f4fe0e8 /kernel/trace
parent70255b5e3f1bd1a5af5b1e425ec2c4db7c735112 (diff)
ring_buffer: implement new locking
The old "lock always" scheme had issues with lockdep, and was not very efficient anyways. This patch does a new design to be partially lockless on writes. Writes will add new entries to the per cpu pages by simply disabling interrupts. When a write needs to go to another page than it will grab the lock. A new "read page" has been added so that the reader can pull out a page from the ring buffer to read without worrying about the writer writing over it. This allows us to not take the lock for all reads. The lock is now only taken when a read needs to go to a new page. This is far from lockless, and interrupts still need to be disabled, but it is a step towards a more lockless solution, and it also solves a lot of the issues that were noticed by the first conversion of ftrace to the ring buffers. Note: the ring_buffer_{un}lock API has been removed. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/ring_buffer.c298
-rw-r--r--kernel/trace/trace.c113
2 files changed, 247 insertions, 164 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8e7392fd0db9..9631abf2ae29 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -161,8 +161,10 @@ struct ring_buffer_per_cpu {
161 struct list_head pages; 161 struct list_head pages;
162 unsigned long head; /* read from head */ 162 unsigned long head; /* read from head */
163 unsigned long tail; /* write to tail */ 163 unsigned long tail; /* write to tail */
164 unsigned long reader;
164 struct buffer_page *head_page; 165 struct buffer_page *head_page;
165 struct buffer_page *tail_page; 166 struct buffer_page *tail_page;
167 struct buffer_page *reader_page;
166 unsigned long overrun; 168 unsigned long overrun;
167 unsigned long entries; 169 unsigned long entries;
168 u64 write_stamp; 170 u64 write_stamp;
@@ -260,6 +262,7 @@ static struct ring_buffer_per_cpu *
260rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) 262rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
261{ 263{
262 struct ring_buffer_per_cpu *cpu_buffer; 264 struct ring_buffer_per_cpu *cpu_buffer;
265 unsigned long addr;
263 int ret; 266 int ret;
264 267
265 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), 268 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
@@ -272,9 +275,16 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
272 spin_lock_init(&cpu_buffer->lock); 275 spin_lock_init(&cpu_buffer->lock);
273 INIT_LIST_HEAD(&cpu_buffer->pages); 276 INIT_LIST_HEAD(&cpu_buffer->pages);
274 277
278 addr = __get_free_page(GFP_KERNEL);
279 if (!addr)
280 goto fail_free_buffer;
281 cpu_buffer->reader_page = (struct buffer_page *)virt_to_page(addr);
282 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
283 cpu_buffer->reader_page->size = 0;
284
275 ret = rb_allocate_pages(cpu_buffer, buffer->pages); 285 ret = rb_allocate_pages(cpu_buffer, buffer->pages);
276 if (ret < 0) 286 if (ret < 0)
277 goto fail_free_buffer; 287 goto fail_free_reader;
278 288
279 cpu_buffer->head_page 289 cpu_buffer->head_page
280 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 290 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
@@ -283,6 +293,9 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
283 293
284 return cpu_buffer; 294 return cpu_buffer;
285 295
296 fail_free_reader:
297 free_buffer_page(cpu_buffer->reader_page);
298
286 fail_free_buffer: 299 fail_free_buffer:
287 kfree(cpu_buffer); 300 kfree(cpu_buffer);
288 return NULL; 301 return NULL;
@@ -293,6 +306,9 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
293 struct list_head *head = &cpu_buffer->pages; 306 struct list_head *head = &cpu_buffer->pages;
294 struct buffer_page *page, *tmp; 307 struct buffer_page *page, *tmp;
295 308
309 list_del_init(&cpu_buffer->reader_page->list);
310 free_buffer_page(cpu_buffer->reader_page);
311
296 list_for_each_entry_safe(page, tmp, head, list) { 312 list_for_each_entry_safe(page, tmp, head, list) {
297 list_del_init(&page->list); 313 list_del_init(&page->list);
298 free_buffer_page(page); 314 free_buffer_page(page);
@@ -538,8 +554,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
538 554
539static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 555static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
540{ 556{
541 return cpu_buffer->head_page == cpu_buffer->tail_page && 557 return (cpu_buffer->reader == cpu_buffer->reader_page->size &&
542 cpu_buffer->head == cpu_buffer->tail; 558 (cpu_buffer->tail_page == cpu_buffer->reader_page ||
559 (cpu_buffer->tail_page == cpu_buffer->head_page &&
560 cpu_buffer->head == cpu_buffer->tail)));
543} 561}
544 562
545static inline int rb_null_event(struct ring_buffer_event *event) 563static inline int rb_null_event(struct ring_buffer_event *event)
@@ -555,10 +573,10 @@ static inline void *rb_page_index(struct buffer_page *page, unsigned index)
555} 573}
556 574
557static inline struct ring_buffer_event * 575static inline struct ring_buffer_event *
558rb_head_event(struct ring_buffer_per_cpu *cpu_buffer) 576rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
559{ 577{
560 return rb_page_index(cpu_buffer->head_page, 578 return rb_page_index(cpu_buffer->reader_page,
561 cpu_buffer->head); 579 cpu_buffer->reader);
562} 580}
563 581
564static inline struct ring_buffer_event * 582static inline struct ring_buffer_event *
@@ -610,15 +628,32 @@ rb_add_stamp(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
610 cpu_buffer->write_stamp = *ts; 628 cpu_buffer->write_stamp = *ts;
611} 629}
612 630
613static void rb_reset_read_page(struct ring_buffer_per_cpu *cpu_buffer) 631static void rb_reset_head_page(struct ring_buffer_per_cpu *cpu_buffer)
614{ 632{
615 cpu_buffer->read_stamp = cpu_buffer->head_page->time_stamp;
616 cpu_buffer->head = 0; 633 cpu_buffer->head = 0;
617} 634}
618 635
619static void 636static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
620rb_reset_iter_read_page(struct ring_buffer_iter *iter)
621{ 637{
638 cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
639 cpu_buffer->reader = 0;
640}
641
642static inline void rb_inc_iter(struct ring_buffer_iter *iter)
643{
644 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
645
646 /*
647 * The iterator could be on the reader page (it starts there).
648 * But the head could have moved, since the reader was
649 * found. Check for this case and assign the iterator
650 * to the head page instead of next.
651 */
652 if (iter->head_page == cpu_buffer->reader_page)
653 iter->head_page = cpu_buffer->head_page;
654 else
655 rb_inc_page(cpu_buffer, &iter->head_page);
656
622 iter->read_stamp = iter->head_page->time_stamp; 657 iter->read_stamp = iter->head_page->time_stamp;
623 iter->head = 0; 658 iter->head = 0;
624} 659}
@@ -693,30 +728,39 @@ static struct ring_buffer_event *
693__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 728__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
694 unsigned type, unsigned long length, u64 *ts) 729 unsigned type, unsigned long length, u64 *ts)
695{ 730{
696 struct buffer_page *head_page, *tail_page; 731 struct buffer_page *tail_page, *head_page, *reader_page;
697 unsigned long tail; 732 unsigned long tail;
698 struct ring_buffer *buffer = cpu_buffer->buffer; 733 struct ring_buffer *buffer = cpu_buffer->buffer;
699 struct ring_buffer_event *event; 734 struct ring_buffer_event *event;
700 735
736 /* No locking needed for tail page */
701 tail_page = cpu_buffer->tail_page; 737 tail_page = cpu_buffer->tail_page;
702 head_page = cpu_buffer->head_page;
703 tail = cpu_buffer->tail; 738 tail = cpu_buffer->tail;
704 739
705 if (tail + length > BUF_PAGE_SIZE) { 740 if (tail + length > BUF_PAGE_SIZE) {
706 struct buffer_page *next_page = tail_page; 741 struct buffer_page *next_page = tail_page;
707 742
743 spin_lock(&cpu_buffer->lock);
708 rb_inc_page(cpu_buffer, &next_page); 744 rb_inc_page(cpu_buffer, &next_page);
709 745
746 head_page = cpu_buffer->head_page;
747 reader_page = cpu_buffer->reader_page;
748
749 /* we grabbed the lock before incrementing */
750 WARN_ON(next_page == reader_page);
751
710 if (next_page == head_page) { 752 if (next_page == head_page) {
711 if (!(buffer->flags & RB_FL_OVERWRITE)) 753 if (!(buffer->flags & RB_FL_OVERWRITE)) {
754 spin_unlock(&cpu_buffer->lock);
712 return NULL; 755 return NULL;
756 }
713 757
714 /* count overflows */ 758 /* count overflows */
715 rb_update_overflow(cpu_buffer); 759 rb_update_overflow(cpu_buffer);
716 760
717 rb_inc_page(cpu_buffer, &head_page); 761 rb_inc_page(cpu_buffer, &head_page);
718 cpu_buffer->head_page = head_page; 762 cpu_buffer->head_page = head_page;
719 rb_reset_read_page(cpu_buffer); 763 rb_reset_head_page(cpu_buffer);
720 } 764 }
721 765
722 if (tail != BUF_PAGE_SIZE) { 766 if (tail != BUF_PAGE_SIZE) {
@@ -732,6 +776,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
732 cpu_buffer->tail_page = tail_page; 776 cpu_buffer->tail_page = tail_page;
733 cpu_buffer->tail = tail; 777 cpu_buffer->tail = tail;
734 rb_add_stamp(cpu_buffer, ts); 778 rb_add_stamp(cpu_buffer, ts);
779 spin_unlock(&cpu_buffer->lock);
735 } 780 }
736 781
737 BUG_ON(tail + length > BUF_PAGE_SIZE); 782 BUG_ON(tail + length > BUF_PAGE_SIZE);
@@ -802,7 +847,9 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
802 return NULL; 847 return NULL;
803 } 848 }
804 } else { 849 } else {
850 spin_lock(&cpu_buffer->lock);
805 rb_add_stamp(cpu_buffer, &ts); 851 rb_add_stamp(cpu_buffer, &ts);
852 spin_unlock(&cpu_buffer->lock);
806 delta = 0; 853 delta = 0;
807 } 854 }
808 855
@@ -851,13 +898,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
851 cpu = raw_smp_processor_id(); 898 cpu = raw_smp_processor_id();
852 899
853 if (!cpu_isset(cpu, buffer->cpumask)) 900 if (!cpu_isset(cpu, buffer->cpumask))
854 goto out_irq; 901 goto out;
855 902
856 cpu_buffer = buffer->buffers[cpu]; 903 cpu_buffer = buffer->buffers[cpu];
857 spin_lock(&cpu_buffer->lock);
858 904
859 if (atomic_read(&cpu_buffer->record_disabled)) 905 if (atomic_read(&cpu_buffer->record_disabled))
860 goto no_record; 906 goto out;
861 907
862 length = rb_calculate_event_length(length); 908 length = rb_calculate_event_length(length);
863 if (length > BUF_PAGE_SIZE) 909 if (length > BUF_PAGE_SIZE)
@@ -865,13 +911,11 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
865 911
866 event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length); 912 event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
867 if (!event) 913 if (!event)
868 goto no_record; 914 goto out;
869 915
870 return event; 916 return event;
871 917
872 no_record: 918 out:
873 spin_unlock(&cpu_buffer->lock);
874 out_irq:
875 local_irq_restore(*flags); 919 local_irq_restore(*flags);
876 return NULL; 920 return NULL;
877} 921}
@@ -904,11 +948,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
904 948
905 cpu_buffer = buffer->buffers[cpu]; 949 cpu_buffer = buffer->buffers[cpu];
906 950
907 assert_spin_locked(&cpu_buffer->lock);
908
909 rb_commit(cpu_buffer, event); 951 rb_commit(cpu_buffer, event);
910 952
911 spin_unlock(&cpu_buffer->lock);
912 local_irq_restore(flags); 953 local_irq_restore(flags);
913 954
914 return 0; 955 return 0;
@@ -945,10 +986,9 @@ int ring_buffer_write(struct ring_buffer *buffer,
945 cpu = raw_smp_processor_id(); 986 cpu = raw_smp_processor_id();
946 987
947 if (!cpu_isset(cpu, buffer->cpumask)) 988 if (!cpu_isset(cpu, buffer->cpumask))
948 goto out_irq; 989 goto out;
949 990
950 cpu_buffer = buffer->buffers[cpu]; 991 cpu_buffer = buffer->buffers[cpu];
951 spin_lock(&cpu_buffer->lock);
952 992
953 if (atomic_read(&cpu_buffer->record_disabled)) 993 if (atomic_read(&cpu_buffer->record_disabled))
954 goto out; 994 goto out;
@@ -967,56 +1007,12 @@ int ring_buffer_write(struct ring_buffer *buffer,
967 1007
968 ret = 0; 1008 ret = 0;
969 out: 1009 out:
970 spin_unlock(&cpu_buffer->lock);
971 out_irq:
972 local_irq_restore(flags); 1010 local_irq_restore(flags);
973 1011
974 return ret; 1012 return ret;
975} 1013}
976 1014
977/** 1015/**
978 * ring_buffer_lock - lock the ring buffer
979 * @buffer: The ring buffer to lock
980 * @flags: The place to store the interrupt flags
981 *
982 * This locks all the per CPU buffers.
983 *
984 * Must be unlocked by ring_buffer_unlock.
985 */
986void ring_buffer_lock(struct ring_buffer *buffer, unsigned long *flags)
987{
988 struct ring_buffer_per_cpu *cpu_buffer;
989 int cpu;
990
991 local_irq_save(*flags);
992
993 for_each_buffer_cpu(buffer, cpu) {
994 cpu_buffer = buffer->buffers[cpu];
995 spin_lock(&cpu_buffer->lock);
996 }
997}
998
999/**
1000 * ring_buffer_unlock - unlock a locked buffer
1001 * @buffer: The locked buffer to unlock
1002 * @flags: The interrupt flags received by ring_buffer_lock
1003 */
1004void ring_buffer_unlock(struct ring_buffer *buffer, unsigned long flags)
1005{
1006 struct ring_buffer_per_cpu *cpu_buffer;
1007 int cpu;
1008
1009 for (cpu = buffer->cpus - 1; cpu >= 0; cpu--) {
1010 if (!cpu_isset(cpu, buffer->cpumask))
1011 continue;
1012 cpu_buffer = buffer->buffers[cpu];
1013 spin_unlock(&cpu_buffer->lock);
1014 }
1015
1016 local_irq_restore(flags);
1017}
1018
1019/**
1020 * ring_buffer_record_disable - stop all writes into the buffer 1016 * ring_buffer_record_disable - stop all writes into the buffer
1021 * @buffer: The ring buffer to stop writes to. 1017 * @buffer: The ring buffer to stop writes to.
1022 * 1018 *
@@ -1169,9 +1165,18 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1169{ 1165{
1170 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1166 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1171 1167
1172 iter->head_page = cpu_buffer->head_page; 1168 /* Iterator usage is expected to have record disabled */
1173 iter->head = cpu_buffer->head; 1169 if (list_empty(&cpu_buffer->reader_page->list)) {
1174 rb_reset_iter_read_page(iter); 1170 iter->head_page = cpu_buffer->head_page;
1171 iter->head = cpu_buffer->head;
1172 } else {
1173 iter->head_page = cpu_buffer->reader_page;
1174 iter->head = cpu_buffer->reader;
1175 }
1176 if (iter->head)
1177 iter->read_stamp = cpu_buffer->read_stamp;
1178 else
1179 iter->read_stamp = iter->head_page->time_stamp;
1175} 1180}
1176 1181
1177/** 1182/**
@@ -1250,43 +1255,84 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
1250 return; 1255 return;
1251} 1256}
1252 1257
1253static void rb_advance_head(struct ring_buffer_per_cpu *cpu_buffer) 1258static struct buffer_page *
1259rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1254{ 1260{
1255 struct ring_buffer_event *event; 1261 struct buffer_page *reader = NULL;
1256 unsigned length; 1262 unsigned long flags;
1263
1264 spin_lock_irqsave(&cpu_buffer->lock, flags);
1265
1266 again:
1267 reader = cpu_buffer->reader_page;
1268
1269 /* If there's more to read, return this page */
1270 if (cpu_buffer->reader < reader->size)
1271 goto out;
1272
1273 /* Never should we have an index greater than the size */
1274 WARN_ON(cpu_buffer->reader > reader->size);
1275
1276 /* check if we caught up to the tail */
1277 reader = NULL;
1278 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
1279 goto out;
1257 1280
1258 /* 1281 /*
1259 * Check if we are at the end of the buffer. 1282 * Splice the empty reader page into the list around the head.
1283 * Reset the reader page to size zero.
1260 */ 1284 */
1261 if (cpu_buffer->head >= cpu_buffer->head_page->size) {
1262 BUG_ON(cpu_buffer->head_page == cpu_buffer->tail_page);
1263 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
1264 rb_reset_read_page(cpu_buffer);
1265 return;
1266 }
1267 1285
1268 event = rb_head_event(cpu_buffer); 1286 reader = cpu_buffer->head_page;
1287 cpu_buffer->reader_page->list.next = reader->list.next;
1288 cpu_buffer->reader_page->list.prev = reader->list.prev;
1289 cpu_buffer->reader_page->size = 0;
1269 1290
1270 if (event->type == RINGBUF_TYPE_DATA) 1291 /* Make the reader page now replace the head */
1271 cpu_buffer->entries--; 1292 reader->list.prev->next = &cpu_buffer->reader_page->list;
1272 1293 reader->list.next->prev = &cpu_buffer->reader_page->list;
1273 length = rb_event_length(event);
1274 1294
1275 /* 1295 /*
1276 * This should not be called to advance the header if we are 1296 * If the tail is on the reader, then we must set the head
1277 * at the tail of the buffer. 1297 * to the inserted page, otherwise we set it one before.
1278 */ 1298 */
1279 BUG_ON((cpu_buffer->head_page == cpu_buffer->tail_page) && 1299 cpu_buffer->head_page = cpu_buffer->reader_page;
1280 (cpu_buffer->head + length > cpu_buffer->tail));
1281 1300
1282 rb_update_read_stamp(cpu_buffer, event); 1301 if (cpu_buffer->tail_page != reader)
1302 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
1303
1304 /* Finally update the reader page to the new head */
1305 cpu_buffer->reader_page = reader;
1306 rb_reset_reader_page(cpu_buffer);
1307
1308 goto again;
1309
1310 out:
1311 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1312
1313 return reader;
1314}
1315
1316static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1317{
1318 struct ring_buffer_event *event;
1319 struct buffer_page *reader;
1320 unsigned length;
1321
1322 reader = rb_get_reader_page(cpu_buffer);
1283 1323
1284 cpu_buffer->head += length; 1324 /* This function should not be called when buffer is empty */
1325 BUG_ON(!reader);
1285 1326
1286 /* check for end of page */ 1327 event = rb_reader_event(cpu_buffer);
1287 if ((cpu_buffer->head >= cpu_buffer->head_page->size) && 1328
1288 (cpu_buffer->head_page != cpu_buffer->tail_page)) 1329 if (event->type == RINGBUF_TYPE_DATA)
1289 rb_advance_head(cpu_buffer); 1330 cpu_buffer->entries--;
1331
1332 rb_update_read_stamp(cpu_buffer, event);
1333
1334 length = rb_event_length(event);
1335 cpu_buffer->reader += length;
1290} 1336}
1291 1337
1292static void rb_advance_iter(struct ring_buffer_iter *iter) 1338static void rb_advance_iter(struct ring_buffer_iter *iter)
@@ -1304,8 +1350,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1304 */ 1350 */
1305 if (iter->head >= iter->head_page->size) { 1351 if (iter->head >= iter->head_page->size) {
1306 BUG_ON(iter->head_page == cpu_buffer->tail_page); 1352 BUG_ON(iter->head_page == cpu_buffer->tail_page);
1307 rb_inc_page(cpu_buffer, &iter->head_page); 1353 rb_inc_iter(iter);
1308 rb_reset_iter_read_page(iter);
1309 return; 1354 return;
1310 } 1355 }
1311 1356
@@ -1344,6 +1389,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1344{ 1389{
1345 struct ring_buffer_per_cpu *cpu_buffer; 1390 struct ring_buffer_per_cpu *cpu_buffer;
1346 struct ring_buffer_event *event; 1391 struct ring_buffer_event *event;
1392 struct buffer_page *reader;
1347 1393
1348 if (!cpu_isset(cpu, buffer->cpumask)) 1394 if (!cpu_isset(cpu, buffer->cpumask))
1349 return NULL; 1395 return NULL;
@@ -1351,25 +1397,26 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1351 cpu_buffer = buffer->buffers[cpu]; 1397 cpu_buffer = buffer->buffers[cpu];
1352 1398
1353 again: 1399 again:
1354 if (rb_per_cpu_empty(cpu_buffer)) 1400 reader = rb_get_reader_page(cpu_buffer);
1401 if (!reader)
1355 return NULL; 1402 return NULL;
1356 1403
1357 event = rb_head_event(cpu_buffer); 1404 event = rb_reader_event(cpu_buffer);
1358 1405
1359 switch (event->type) { 1406 switch (event->type) {
1360 case RINGBUF_TYPE_PADDING: 1407 case RINGBUF_TYPE_PADDING:
1361 rb_inc_page(cpu_buffer, &cpu_buffer->head_page); 1408 WARN_ON(1);
1362 rb_reset_read_page(cpu_buffer); 1409 rb_advance_reader(cpu_buffer);
1363 goto again; 1410 return NULL;
1364 1411
1365 case RINGBUF_TYPE_TIME_EXTEND: 1412 case RINGBUF_TYPE_TIME_EXTEND:
1366 /* Internal data, OK to advance */ 1413 /* Internal data, OK to advance */
1367 rb_advance_head(cpu_buffer); 1414 rb_advance_reader(cpu_buffer);
1368 goto again; 1415 goto again;
1369 1416
1370 case RINGBUF_TYPE_TIME_STAMP: 1417 case RINGBUF_TYPE_TIME_STAMP:
1371 /* FIXME: not implemented */ 1418 /* FIXME: not implemented */
1372 rb_advance_head(cpu_buffer); 1419 rb_advance_reader(cpu_buffer);
1373 goto again; 1420 goto again;
1374 1421
1375 case RINGBUF_TYPE_DATA: 1422 case RINGBUF_TYPE_DATA:
@@ -1415,8 +1462,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1415 1462
1416 switch (event->type) { 1463 switch (event->type) {
1417 case RINGBUF_TYPE_PADDING: 1464 case RINGBUF_TYPE_PADDING:
1418 rb_inc_page(cpu_buffer, &iter->head_page); 1465 rb_inc_iter(iter);
1419 rb_reset_iter_read_page(iter);
1420 goto again; 1466 goto again;
1421 1467
1422 case RINGBUF_TYPE_TIME_EXTEND: 1468 case RINGBUF_TYPE_TIME_EXTEND:
@@ -1465,7 +1511,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1465 return NULL; 1511 return NULL;
1466 1512
1467 cpu_buffer = buffer->buffers[cpu]; 1513 cpu_buffer = buffer->buffers[cpu];
1468 rb_advance_head(cpu_buffer); 1514 rb_advance_reader(cpu_buffer);
1469 1515
1470 return event; 1516 return event;
1471} 1517}
@@ -1487,6 +1533,7 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1487{ 1533{
1488 struct ring_buffer_per_cpu *cpu_buffer; 1534 struct ring_buffer_per_cpu *cpu_buffer;
1489 struct ring_buffer_iter *iter; 1535 struct ring_buffer_iter *iter;
1536 unsigned long flags;
1490 1537
1491 if (!cpu_isset(cpu, buffer->cpumask)) 1538 if (!cpu_isset(cpu, buffer->cpumask))
1492 return NULL; 1539 return NULL;
@@ -1502,11 +1549,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1502 atomic_inc(&cpu_buffer->record_disabled); 1549 atomic_inc(&cpu_buffer->record_disabled);
1503 synchronize_sched(); 1550 synchronize_sched();
1504 1551
1505 spin_lock(&cpu_buffer->lock); 1552 spin_lock_irqsave(&cpu_buffer->lock, flags);
1506 iter->head = cpu_buffer->head; 1553 ring_buffer_iter_reset(iter);
1507 iter->head_page = cpu_buffer->head_page; 1554 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1508 rb_reset_iter_read_page(iter);
1509 spin_unlock(&cpu_buffer->lock);
1510 1555
1511 return iter; 1556 return iter;
1512} 1557}
@@ -1562,10 +1607,14 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1562{ 1607{
1563 cpu_buffer->head_page 1608 cpu_buffer->head_page
1564 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 1609 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
1565 cpu_buffer->tail_page 1610 cpu_buffer->head_page->size = 0;
1566 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 1611 cpu_buffer->tail_page = cpu_buffer->head_page;
1612 cpu_buffer->tail_page->size = 0;
1613 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1614 cpu_buffer->reader_page->size = 0;
1615
1616 cpu_buffer->head = cpu_buffer->tail = cpu_buffer->reader = 0;
1567 1617
1568 cpu_buffer->head = cpu_buffer->tail = 0;
1569 cpu_buffer->overrun = 0; 1618 cpu_buffer->overrun = 0;
1570 cpu_buffer->entries = 0; 1619 cpu_buffer->entries = 0;
1571} 1620}
@@ -1583,13 +1632,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
1583 if (!cpu_isset(cpu, buffer->cpumask)) 1632 if (!cpu_isset(cpu, buffer->cpumask))
1584 return; 1633 return;
1585 1634
1586 local_irq_save(flags); 1635 spin_lock_irqsave(&cpu_buffer->lock, flags);
1587 spin_lock(&cpu_buffer->lock);
1588 1636
1589 rb_reset_cpu(cpu_buffer); 1637 rb_reset_cpu(cpu_buffer);
1590 1638
1591 spin_unlock(&cpu_buffer->lock); 1639 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1592 local_irq_restore(flags);
1593} 1640}
1594 1641
1595/** 1642/**
@@ -1598,15 +1645,10 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
1598 */ 1645 */
1599void ring_buffer_reset(struct ring_buffer *buffer) 1646void ring_buffer_reset(struct ring_buffer *buffer)
1600{ 1647{
1601 unsigned long flags;
1602 int cpu; 1648 int cpu;
1603 1649
1604 ring_buffer_lock(buffer, &flags);
1605
1606 for_each_buffer_cpu(buffer, cpu) 1650 for_each_buffer_cpu(buffer, cpu)
1607 rb_reset_cpu(buffer->buffers[cpu]); 1651 ring_buffer_reset_cpu(buffer, cpu);
1608
1609 ring_buffer_unlock(buffer, flags);
1610} 1652}
1611 1653
1612/** 1654/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6a1c76bb56ba..b542f8837801 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -42,6 +42,20 @@
42unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 42unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
43unsigned long __read_mostly tracing_thresh; 43unsigned long __read_mostly tracing_thresh;
44 44
45static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
46
47static inline void ftrace_disable_cpu(void)
48{
49 preempt_disable();
50 local_inc(&__get_cpu_var(ftrace_cpu_disabled));
51}
52
53static inline void ftrace_enable_cpu(void)
54{
55 local_dec(&__get_cpu_var(ftrace_cpu_disabled));
56 preempt_enable();
57}
58
45static cpumask_t __read_mostly tracing_buffer_mask; 59static cpumask_t __read_mostly tracing_buffer_mask;
46 60
47#define for_each_tracing_cpu(cpu) \ 61#define for_each_tracing_cpu(cpu) \
@@ -406,7 +420,9 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
406 tr->buffer = max_tr.buffer; 420 tr->buffer = max_tr.buffer;
407 max_tr.buffer = buf; 421 max_tr.buffer = buf;
408 422
423 ftrace_disable_cpu();
409 ring_buffer_reset(tr->buffer); 424 ring_buffer_reset(tr->buffer);
425 ftrace_enable_cpu();
410 426
411 __update_max_tr(tr, tsk, cpu); 427 __update_max_tr(tr, tsk, cpu);
412 __raw_spin_unlock(&ftrace_max_lock); 428 __raw_spin_unlock(&ftrace_max_lock);
@@ -428,9 +444,13 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
428 WARN_ON_ONCE(!irqs_disabled()); 444 WARN_ON_ONCE(!irqs_disabled());
429 __raw_spin_lock(&ftrace_max_lock); 445 __raw_spin_lock(&ftrace_max_lock);
430 446
447 ftrace_disable_cpu();
448
431 ring_buffer_reset(max_tr.buffer); 449 ring_buffer_reset(max_tr.buffer);
432 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); 450 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
433 451
452 ftrace_enable_cpu();
453
434 WARN_ON_ONCE(ret); 454 WARN_ON_ONCE(ret);
435 455
436 __update_max_tr(tr, tsk, cpu); 456 __update_max_tr(tr, tsk, cpu);
@@ -543,7 +563,9 @@ void unregister_tracer(struct tracer *type)
543 563
544void tracing_reset(struct trace_array *tr, int cpu) 564void tracing_reset(struct trace_array *tr, int cpu)
545{ 565{
566 ftrace_disable_cpu();
546 ring_buffer_reset_cpu(tr->buffer, cpu); 567 ring_buffer_reset_cpu(tr->buffer, cpu);
568 ftrace_enable_cpu();
547} 569}
548 570
549#define SAVED_CMDLINES 128 571#define SAVED_CMDLINES 128
@@ -654,6 +676,10 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
654 struct ftrace_entry *entry; 676 struct ftrace_entry *entry;
655 unsigned long irq_flags; 677 unsigned long irq_flags;
656 678
679 /* If we are reading the ring buffer, don't trace */
680 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
681 return;
682
657 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 683 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
658 &irq_flags); 684 &irq_flags);
659 if (!event) 685 if (!event)
@@ -870,8 +896,14 @@ enum trace_file_type {
870 896
871static void trace_iterator_increment(struct trace_iterator *iter, int cpu) 897static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
872{ 898{
899 /* Don't allow ftrace to trace into the ring buffers */
900 ftrace_disable_cpu();
901
873 iter->idx++; 902 iter->idx++;
874 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); 903 if (iter->buffer_iter[iter->cpu])
904 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
905
906 ftrace_enable_cpu();
875} 907}
876 908
877static struct trace_entry * 909static struct trace_entry *
@@ -880,9 +912,19 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
880 struct ring_buffer_event *event; 912 struct ring_buffer_event *event;
881 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; 913 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
882 914
883 event = ring_buffer_iter_peek(buf_iter, ts); 915 /* Don't allow ftrace to trace into the ring buffers */
916 ftrace_disable_cpu();
917
918 if (buf_iter)
919 event = ring_buffer_iter_peek(buf_iter, ts);
920 else
921 event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
922
923 ftrace_enable_cpu();
924
884 return event ? ring_buffer_event_data(event) : NULL; 925 return event ? ring_buffer_event_data(event) : NULL;
885} 926}
927
886static struct trace_entry * 928static struct trace_entry *
887__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) 929__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
888{ 930{
@@ -938,7 +980,10 @@ static void *find_next_entry_inc(struct trace_iterator *iter)
938 980
939static void trace_consume(struct trace_iterator *iter) 981static void trace_consume(struct trace_iterator *iter)
940{ 982{
983 /* Don't allow ftrace to trace into the ring buffers */
984 ftrace_disable_cpu();
941 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts); 985 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
986 ftrace_enable_cpu();
942} 987}
943 988
944static void *s_next(struct seq_file *m, void *v, loff_t *pos) 989static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -991,10 +1036,14 @@ static void *s_start(struct seq_file *m, loff_t *pos)
991 iter->cpu = 0; 1036 iter->cpu = 0;
992 iter->idx = -1; 1037 iter->idx = -1;
993 1038
1039 ftrace_disable_cpu();
1040
994 for_each_tracing_cpu(cpu) { 1041 for_each_tracing_cpu(cpu) {
995 ring_buffer_iter_reset(iter->buffer_iter[cpu]); 1042 ring_buffer_iter_reset(iter->buffer_iter[cpu]);
996 } 1043 }
997 1044
1045 ftrace_enable_cpu();
1046
998 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1047 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
999 ; 1048 ;
1000 1049
@@ -1242,7 +1291,16 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1242 cont = (struct trace_field_cont *)ent; 1291 cont = (struct trace_field_cont *)ent;
1243 if (ok) 1292 if (ok)
1244 ok = (trace_seq_printf(s, "%s", cont->buf) > 0); 1293 ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
1245 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); 1294
1295 ftrace_disable_cpu();
1296
1297 if (iter->buffer_iter[iter->cpu])
1298 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1299 else
1300 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
1301
1302 ftrace_enable_cpu();
1303
1246 ent = peek_next_entry(iter, iter->cpu, NULL); 1304 ent = peek_next_entry(iter, iter->cpu, NULL);
1247 } while (ent && ent->type == TRACE_CONT); 1305 } while (ent && ent->type == TRACE_CONT);
1248 1306
@@ -1683,9 +1741,15 @@ static int trace_empty(struct trace_iterator *iter)
1683 int cpu; 1741 int cpu;
1684 1742
1685 for_each_tracing_cpu(cpu) { 1743 for_each_tracing_cpu(cpu) {
1686 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) 1744 if (iter->buffer_iter[cpu]) {
1687 return 0; 1745 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1746 return 0;
1747 } else {
1748 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1749 return 0;
1750 }
1688 } 1751 }
1752
1689 return TRACE_TYPE_HANDLED; 1753 return TRACE_TYPE_HANDLED;
1690} 1754}
1691 1755
@@ -1776,8 +1840,10 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1776 iter->pos = -1; 1840 iter->pos = -1;
1777 1841
1778 for_each_tracing_cpu(cpu) { 1842 for_each_tracing_cpu(cpu) {
1843
1779 iter->buffer_iter[cpu] = 1844 iter->buffer_iter[cpu] =
1780 ring_buffer_read_start(iter->tr->buffer, cpu); 1845 ring_buffer_read_start(iter->tr->buffer, cpu);
1846
1781 if (!iter->buffer_iter[cpu]) 1847 if (!iter->buffer_iter[cpu])
1782 goto fail_buffer; 1848 goto fail_buffer;
1783 } 1849 }
@@ -2341,7 +2407,6 @@ static atomic_t tracing_reader;
2341static int tracing_open_pipe(struct inode *inode, struct file *filp) 2407static int tracing_open_pipe(struct inode *inode, struct file *filp)
2342{ 2408{
2343 struct trace_iterator *iter; 2409 struct trace_iterator *iter;
2344 int cpu;
2345 2410
2346 if (tracing_disabled) 2411 if (tracing_disabled)
2347 return -ENODEV; 2412 return -ENODEV;
@@ -2362,38 +2427,17 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2362 iter->trace = current_trace; 2427 iter->trace = current_trace;
2363 filp->private_data = iter; 2428 filp->private_data = iter;
2364 2429
2365 for_each_tracing_cpu(cpu) {
2366 iter->buffer_iter[cpu] =
2367 ring_buffer_read_start(iter->tr->buffer, cpu);
2368 if (!iter->buffer_iter[cpu])
2369 goto fail_buffer;
2370 }
2371
2372 if (iter->trace->pipe_open) 2430 if (iter->trace->pipe_open)
2373 iter->trace->pipe_open(iter); 2431 iter->trace->pipe_open(iter);
2374 mutex_unlock(&trace_types_lock); 2432 mutex_unlock(&trace_types_lock);
2375 2433
2376 return 0; 2434 return 0;
2377
2378 fail_buffer:
2379 for_each_tracing_cpu(cpu) {
2380 if (iter->buffer_iter[cpu])
2381 ring_buffer_read_finish(iter->buffer_iter[cpu]);
2382 }
2383 mutex_unlock(&trace_types_lock);
2384
2385 return -ENOMEM;
2386} 2435}
2387 2436
2388static int tracing_release_pipe(struct inode *inode, struct file *file) 2437static int tracing_release_pipe(struct inode *inode, struct file *file)
2389{ 2438{
2390 struct trace_iterator *iter = file->private_data; 2439 struct trace_iterator *iter = file->private_data;
2391 int cpu;
2392 2440
2393 for_each_tracing_cpu(cpu) {
2394 if (iter->buffer_iter[cpu])
2395 ring_buffer_read_finish(iter->buffer_iter[cpu]);
2396 }
2397 kfree(iter); 2441 kfree(iter);
2398 atomic_dec(&tracing_reader); 2442 atomic_dec(&tracing_reader);
2399 2443
@@ -2429,7 +2473,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2429 size_t cnt, loff_t *ppos) 2473 size_t cnt, loff_t *ppos)
2430{ 2474{
2431 struct trace_iterator *iter = filp->private_data; 2475 struct trace_iterator *iter = filp->private_data;
2432 unsigned long flags;
2433#ifdef CONFIG_FTRACE 2476#ifdef CONFIG_FTRACE
2434 int ftrace_save; 2477 int ftrace_save;
2435#endif 2478#endif
@@ -2528,7 +2571,6 @@ waitagain:
2528 ftrace_enabled = 0; 2571 ftrace_enabled = 0;
2529#endif 2572#endif
2530 smp_wmb(); 2573 smp_wmb();
2531 ring_buffer_lock(iter->tr->buffer, &flags);
2532 2574
2533 while (find_next_entry_inc(iter) != NULL) { 2575 while (find_next_entry_inc(iter) != NULL) {
2534 enum print_line_t ret; 2576 enum print_line_t ret;
@@ -2547,7 +2589,6 @@ waitagain:
2547 break; 2589 break;
2548 } 2590 }
2549 2591
2550 ring_buffer_unlock(iter->tr->buffer, flags);
2551#ifdef CONFIG_FTRACE 2592#ifdef CONFIG_FTRACE
2552 ftrace_enabled = ftrace_save; 2593 ftrace_enabled = ftrace_save;
2553#endif 2594#endif
@@ -3010,8 +3051,8 @@ void ftrace_dump(void)
3010 static struct trace_iterator iter; 3051 static struct trace_iterator iter;
3011 static cpumask_t mask; 3052 static cpumask_t mask;
3012 static int dump_ran; 3053 static int dump_ran;
3013 unsigned long flags, irq_flags; 3054 unsigned long flags;
3014 int cnt = 0; 3055 int cnt = 0, cpu;
3015 3056
3016 /* only one dump */ 3057 /* only one dump */
3017 spin_lock_irqsave(&ftrace_dump_lock, flags); 3058 spin_lock_irqsave(&ftrace_dump_lock, flags);
@@ -3023,6 +3064,10 @@ void ftrace_dump(void)
3023 /* No turning back! */ 3064 /* No turning back! */
3024 ftrace_kill_atomic(); 3065 ftrace_kill_atomic();
3025 3066
3067 for_each_tracing_cpu(cpu) {
3068 atomic_inc(&global_trace.data[cpu]->disabled);
3069 }
3070
3026 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 3071 printk(KERN_TRACE "Dumping ftrace buffer:\n");
3027 3072
3028 iter.tr = &global_trace; 3073 iter.tr = &global_trace;
@@ -3037,8 +3082,6 @@ void ftrace_dump(void)
3037 3082
3038 cpus_clear(mask); 3083 cpus_clear(mask);
3039 3084
3040 ring_buffer_lock(iter.tr->buffer, &irq_flags);
3041
3042 while (!trace_empty(&iter)) { 3085 while (!trace_empty(&iter)) {
3043 3086
3044 if (!cnt) 3087 if (!cnt)
@@ -3066,8 +3109,6 @@ void ftrace_dump(void)
3066 else 3109 else
3067 printk(KERN_TRACE "---------------------------------\n"); 3110 printk(KERN_TRACE "---------------------------------\n");
3068 3111
3069 ring_buffer_unlock(iter.tr->buffer, irq_flags);
3070
3071 out: 3112 out:
3072 spin_unlock_irqrestore(&ftrace_dump_lock, flags); 3113 spin_unlock_irqrestore(&ftrace_dump_lock, flags);
3073} 3114}