aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/trace/ring_buffer.c298
-rw-r--r--kernel/trace/trace.c113
2 files changed, 247 insertions, 164 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8e7392fd0db9..9631abf2ae29 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -161,8 +161,10 @@ struct ring_buffer_per_cpu {
161 struct list_head pages; 161 struct list_head pages;
162 unsigned long head; /* read from head */ 162 unsigned long head; /* read from head */
163 unsigned long tail; /* write to tail */ 163 unsigned long tail; /* write to tail */
164 unsigned long reader;
164 struct buffer_page *head_page; 165 struct buffer_page *head_page;
165 struct buffer_page *tail_page; 166 struct buffer_page *tail_page;
167 struct buffer_page *reader_page;
166 unsigned long overrun; 168 unsigned long overrun;
167 unsigned long entries; 169 unsigned long entries;
168 u64 write_stamp; 170 u64 write_stamp;
@@ -260,6 +262,7 @@ static struct ring_buffer_per_cpu *
260rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) 262rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
261{ 263{
262 struct ring_buffer_per_cpu *cpu_buffer; 264 struct ring_buffer_per_cpu *cpu_buffer;
265 unsigned long addr;
263 int ret; 266 int ret;
264 267
265 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), 268 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
@@ -272,9 +275,16 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
272 spin_lock_init(&cpu_buffer->lock); 275 spin_lock_init(&cpu_buffer->lock);
273 INIT_LIST_HEAD(&cpu_buffer->pages); 276 INIT_LIST_HEAD(&cpu_buffer->pages);
274 277
278 addr = __get_free_page(GFP_KERNEL);
279 if (!addr)
280 goto fail_free_buffer;
281 cpu_buffer->reader_page = (struct buffer_page *)virt_to_page(addr);
282 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
283 cpu_buffer->reader_page->size = 0;
284
275 ret = rb_allocate_pages(cpu_buffer, buffer->pages); 285 ret = rb_allocate_pages(cpu_buffer, buffer->pages);
276 if (ret < 0) 286 if (ret < 0)
277 goto fail_free_buffer; 287 goto fail_free_reader;
278 288
279 cpu_buffer->head_page 289 cpu_buffer->head_page
280 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 290 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
@@ -283,6 +293,9 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
283 293
284 return cpu_buffer; 294 return cpu_buffer;
285 295
296 fail_free_reader:
297 free_buffer_page(cpu_buffer->reader_page);
298
286 fail_free_buffer: 299 fail_free_buffer:
287 kfree(cpu_buffer); 300 kfree(cpu_buffer);
288 return NULL; 301 return NULL;
@@ -293,6 +306,9 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
293 struct list_head *head = &cpu_buffer->pages; 306 struct list_head *head = &cpu_buffer->pages;
294 struct buffer_page *page, *tmp; 307 struct buffer_page *page, *tmp;
295 308
309 list_del_init(&cpu_buffer->reader_page->list);
310 free_buffer_page(cpu_buffer->reader_page);
311
296 list_for_each_entry_safe(page, tmp, head, list) { 312 list_for_each_entry_safe(page, tmp, head, list) {
297 list_del_init(&page->list); 313 list_del_init(&page->list);
298 free_buffer_page(page); 314 free_buffer_page(page);
@@ -538,8 +554,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
538 554
539static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 555static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
540{ 556{
541 return cpu_buffer->head_page == cpu_buffer->tail_page && 557 return (cpu_buffer->reader == cpu_buffer->reader_page->size &&
542 cpu_buffer->head == cpu_buffer->tail; 558 (cpu_buffer->tail_page == cpu_buffer->reader_page ||
559 (cpu_buffer->tail_page == cpu_buffer->head_page &&
560 cpu_buffer->head == cpu_buffer->tail)));
543} 561}
544 562
545static inline int rb_null_event(struct ring_buffer_event *event) 563static inline int rb_null_event(struct ring_buffer_event *event)
@@ -555,10 +573,10 @@ static inline void *rb_page_index(struct buffer_page *page, unsigned index)
555} 573}
556 574
557static inline struct ring_buffer_event * 575static inline struct ring_buffer_event *
558rb_head_event(struct ring_buffer_per_cpu *cpu_buffer) 576rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
559{ 577{
560 return rb_page_index(cpu_buffer->head_page, 578 return rb_page_index(cpu_buffer->reader_page,
561 cpu_buffer->head); 579 cpu_buffer->reader);
562} 580}
563 581
564static inline struct ring_buffer_event * 582static inline struct ring_buffer_event *
@@ -610,15 +628,32 @@ rb_add_stamp(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
610 cpu_buffer->write_stamp = *ts; 628 cpu_buffer->write_stamp = *ts;
611} 629}
612 630
613static void rb_reset_read_page(struct ring_buffer_per_cpu *cpu_buffer) 631static void rb_reset_head_page(struct ring_buffer_per_cpu *cpu_buffer)
614{ 632{
615 cpu_buffer->read_stamp = cpu_buffer->head_page->time_stamp;
616 cpu_buffer->head = 0; 633 cpu_buffer->head = 0;
617} 634}
618 635
619static void 636static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
620rb_reset_iter_read_page(struct ring_buffer_iter *iter)
621{ 637{
638 cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
639 cpu_buffer->reader = 0;
640}
641
642static inline void rb_inc_iter(struct ring_buffer_iter *iter)
643{
644 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
645
646 /*
647 * The iterator could be on the reader page (it starts there).
648 * But the head could have moved, since the reader was
649 * found. Check for this case and assign the iterator
650 * to the head page instead of next.
651 */
652 if (iter->head_page == cpu_buffer->reader_page)
653 iter->head_page = cpu_buffer->head_page;
654 else
655 rb_inc_page(cpu_buffer, &iter->head_page);
656
622 iter->read_stamp = iter->head_page->time_stamp; 657 iter->read_stamp = iter->head_page->time_stamp;
623 iter->head = 0; 658 iter->head = 0;
624} 659}
@@ -693,30 +728,39 @@ static struct ring_buffer_event *
693__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 728__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
694 unsigned type, unsigned long length, u64 *ts) 729 unsigned type, unsigned long length, u64 *ts)
695{ 730{
696 struct buffer_page *head_page, *tail_page; 731 struct buffer_page *tail_page, *head_page, *reader_page;
697 unsigned long tail; 732 unsigned long tail;
698 struct ring_buffer *buffer = cpu_buffer->buffer; 733 struct ring_buffer *buffer = cpu_buffer->buffer;
699 struct ring_buffer_event *event; 734 struct ring_buffer_event *event;
700 735
736 /* No locking needed for tail page */
701 tail_page = cpu_buffer->tail_page; 737 tail_page = cpu_buffer->tail_page;
702 head_page = cpu_buffer->head_page;
703 tail = cpu_buffer->tail; 738 tail = cpu_buffer->tail;
704 739
705 if (tail + length > BUF_PAGE_SIZE) { 740 if (tail + length > BUF_PAGE_SIZE) {
706 struct buffer_page *next_page = tail_page; 741 struct buffer_page *next_page = tail_page;
707 742
743 spin_lock(&cpu_buffer->lock);
708 rb_inc_page(cpu_buffer, &next_page); 744 rb_inc_page(cpu_buffer, &next_page);
709 745
746 head_page = cpu_buffer->head_page;
747 reader_page = cpu_buffer->reader_page;
748
749 /* we grabbed the lock before incrementing */
750 WARN_ON(next_page == reader_page);
751
710 if (next_page == head_page) { 752 if (next_page == head_page) {
711 if (!(buffer->flags & RB_FL_OVERWRITE)) 753 if (!(buffer->flags & RB_FL_OVERWRITE)) {
754 spin_unlock(&cpu_buffer->lock);
712 return NULL; 755 return NULL;
756 }
713 757
714 /* count overflows */ 758 /* count overflows */
715 rb_update_overflow(cpu_buffer); 759 rb_update_overflow(cpu_buffer);
716 760
717 rb_inc_page(cpu_buffer, &head_page); 761 rb_inc_page(cpu_buffer, &head_page);
718 cpu_buffer->head_page = head_page; 762 cpu_buffer->head_page = head_page;
719 rb_reset_read_page(cpu_buffer); 763 rb_reset_head_page(cpu_buffer);
720 } 764 }
721 765
722 if (tail != BUF_PAGE_SIZE) { 766 if (tail != BUF_PAGE_SIZE) {
@@ -732,6 +776,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
732 cpu_buffer->tail_page = tail_page; 776 cpu_buffer->tail_page = tail_page;
733 cpu_buffer->tail = tail; 777 cpu_buffer->tail = tail;
734 rb_add_stamp(cpu_buffer, ts); 778 rb_add_stamp(cpu_buffer, ts);
779 spin_unlock(&cpu_buffer->lock);
735 } 780 }
736 781
737 BUG_ON(tail + length > BUF_PAGE_SIZE); 782 BUG_ON(tail + length > BUF_PAGE_SIZE);
@@ -802,7 +847,9 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
802 return NULL; 847 return NULL;
803 } 848 }
804 } else { 849 } else {
850 spin_lock(&cpu_buffer->lock);
805 rb_add_stamp(cpu_buffer, &ts); 851 rb_add_stamp(cpu_buffer, &ts);
852 spin_unlock(&cpu_buffer->lock);
806 delta = 0; 853 delta = 0;
807 } 854 }
808 855
@@ -851,13 +898,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
851 cpu = raw_smp_processor_id(); 898 cpu = raw_smp_processor_id();
852 899
853 if (!cpu_isset(cpu, buffer->cpumask)) 900 if (!cpu_isset(cpu, buffer->cpumask))
854 goto out_irq; 901 goto out;
855 902
856 cpu_buffer = buffer->buffers[cpu]; 903 cpu_buffer = buffer->buffers[cpu];
857 spin_lock(&cpu_buffer->lock);
858 904
859 if (atomic_read(&cpu_buffer->record_disabled)) 905 if (atomic_read(&cpu_buffer->record_disabled))
860 goto no_record; 906 goto out;
861 907
862 length = rb_calculate_event_length(length); 908 length = rb_calculate_event_length(length);
863 if (length > BUF_PAGE_SIZE) 909 if (length > BUF_PAGE_SIZE)
@@ -865,13 +911,11 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
865 911
866 event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length); 912 event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
867 if (!event) 913 if (!event)
868 goto no_record; 914 goto out;
869 915
870 return event; 916 return event;
871 917
872 no_record: 918 out:
873 spin_unlock(&cpu_buffer->lock);
874 out_irq:
875 local_irq_restore(*flags); 919 local_irq_restore(*flags);
876 return NULL; 920 return NULL;
877} 921}
@@ -904,11 +948,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
904 948
905 cpu_buffer = buffer->buffers[cpu]; 949 cpu_buffer = buffer->buffers[cpu];
906 950
907 assert_spin_locked(&cpu_buffer->lock);
908
909 rb_commit(cpu_buffer, event); 951 rb_commit(cpu_buffer, event);
910 952
911 spin_unlock(&cpu_buffer->lock);
912 local_irq_restore(flags); 953 local_irq_restore(flags);
913 954
914 return 0; 955 return 0;
@@ -945,10 +986,9 @@ int ring_buffer_write(struct ring_buffer *buffer,
945 cpu = raw_smp_processor_id(); 986 cpu = raw_smp_processor_id();
946 987
947 if (!cpu_isset(cpu, buffer->cpumask)) 988 if (!cpu_isset(cpu, buffer->cpumask))
948 goto out_irq; 989 goto out;
949 990
950 cpu_buffer = buffer->buffers[cpu]; 991 cpu_buffer = buffer->buffers[cpu];
951 spin_lock(&cpu_buffer->lock);
952 992
953 if (atomic_read(&cpu_buffer->record_disabled)) 993 if (atomic_read(&cpu_buffer->record_disabled))
954 goto out; 994 goto out;
@@ -967,56 +1007,12 @@ int ring_buffer_write(struct ring_buffer *buffer,
967 1007
968 ret = 0; 1008 ret = 0;
969 out: 1009 out:
970 spin_unlock(&cpu_buffer->lock);
971 out_irq:
972 local_irq_restore(flags); 1010 local_irq_restore(flags);
973 1011
974 return ret; 1012 return ret;
975} 1013}
976 1014
977/** 1015/**
978 * ring_buffer_lock - lock the ring buffer
979 * @buffer: The ring buffer to lock
980 * @flags: The place to store the interrupt flags
981 *
982 * This locks all the per CPU buffers.
983 *
984 * Must be unlocked by ring_buffer_unlock.
985 */
986void ring_buffer_lock(struct ring_buffer *buffer, unsigned long *flags)
987{
988 struct ring_buffer_per_cpu *cpu_buffer;
989 int cpu;
990
991 local_irq_save(*flags);
992
993 for_each_buffer_cpu(buffer, cpu) {
994 cpu_buffer = buffer->buffers[cpu];
995 spin_lock(&cpu_buffer->lock);
996 }
997}
998
999/**
1000 * ring_buffer_unlock - unlock a locked buffer
1001 * @buffer: The locked buffer to unlock
1002 * @flags: The interrupt flags received by ring_buffer_lock
1003 */
1004void ring_buffer_unlock(struct ring_buffer *buffer, unsigned long flags)
1005{
1006 struct ring_buffer_per_cpu *cpu_buffer;
1007 int cpu;
1008
1009 for (cpu = buffer->cpus - 1; cpu >= 0; cpu--) {
1010 if (!cpu_isset(cpu, buffer->cpumask))
1011 continue;
1012 cpu_buffer = buffer->buffers[cpu];
1013 spin_unlock(&cpu_buffer->lock);
1014 }
1015
1016 local_irq_restore(flags);
1017}
1018
1019/**
1020 * ring_buffer_record_disable - stop all writes into the buffer 1016 * ring_buffer_record_disable - stop all writes into the buffer
1021 * @buffer: The ring buffer to stop writes to. 1017 * @buffer: The ring buffer to stop writes to.
1022 * 1018 *
@@ -1169,9 +1165,18 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1169{ 1165{
1170 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1166 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1171 1167
1172 iter->head_page = cpu_buffer->head_page; 1168 /* Iterator usage is expected to have record disabled */
1173 iter->head = cpu_buffer->head; 1169 if (list_empty(&cpu_buffer->reader_page->list)) {
1174 rb_reset_iter_read_page(iter); 1170 iter->head_page = cpu_buffer->head_page;
1171 iter->head = cpu_buffer->head;
1172 } else {
1173 iter->head_page = cpu_buffer->reader_page;
1174 iter->head = cpu_buffer->reader;
1175 }
1176 if (iter->head)
1177 iter->read_stamp = cpu_buffer->read_stamp;
1178 else
1179 iter->read_stamp = iter->head_page->time_stamp;
1175} 1180}
1176 1181
1177/** 1182/**
@@ -1250,43 +1255,84 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
1250 return; 1255 return;
1251} 1256}
1252 1257
1253static void rb_advance_head(struct ring_buffer_per_cpu *cpu_buffer) 1258static struct buffer_page *
1259rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1254{ 1260{
1255 struct ring_buffer_event *event; 1261 struct buffer_page *reader = NULL;
1256 unsigned length; 1262 unsigned long flags;
1263
1264 spin_lock_irqsave(&cpu_buffer->lock, flags);
1265
1266 again:
1267 reader = cpu_buffer->reader_page;
1268
1269 /* If there's more to read, return this page */
1270 if (cpu_buffer->reader < reader->size)
1271 goto out;
1272
1273 /* Never should we have an index greater than the size */
1274 WARN_ON(cpu_buffer->reader > reader->size);
1275
1276 /* check if we caught up to the tail */
1277 reader = NULL;
1278 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
1279 goto out;
1257 1280
1258 /* 1281 /*
1259 * Check if we are at the end of the buffer. 1282 * Splice the empty reader page into the list around the head.
1283 * Reset the reader page to size zero.
1260 */ 1284 */
1261 if (cpu_buffer->head >= cpu_buffer->head_page->size) {
1262 BUG_ON(cpu_buffer->head_page == cpu_buffer->tail_page);
1263 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
1264 rb_reset_read_page(cpu_buffer);
1265 return;
1266 }
1267 1285
1268 event = rb_head_event(cpu_buffer); 1286 reader = cpu_buffer->head_page;
1287 cpu_buffer->reader_page->list.next = reader->list.next;
1288 cpu_buffer->reader_page->list.prev = reader->list.prev;
1289 cpu_buffer->reader_page->size = 0;
1269 1290
1270 if (event->type == RINGBUF_TYPE_DATA) 1291 /* Make the reader page now replace the head */
1271 cpu_buffer->entries--; 1292 reader->list.prev->next = &cpu_buffer->reader_page->list;
1272 1293 reader->list.next->prev = &cpu_buffer->reader_page->list;
1273 length = rb_event_length(event);
1274 1294
1275 /* 1295 /*
1276 * This should not be called to advance the header if we are 1296 * If the tail is on the reader, then we must set the head
1277 * at the tail of the buffer. 1297 * to the inserted page, otherwise we set it one before.
1278 */ 1298 */
1279 BUG_ON((cpu_buffer->head_page == cpu_buffer->tail_page) && 1299 cpu_buffer->head_page = cpu_buffer->reader_page;
1280 (cpu_buffer->head + length > cpu_buffer->tail));
1281 1300
1282 rb_update_read_stamp(cpu_buffer, event); 1301 if (cpu_buffer->tail_page != reader)
1302 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
1303
1304 /* Finally update the reader page to the new head */
1305 cpu_buffer->reader_page = reader;
1306 rb_reset_reader_page(cpu_buffer);
1307
1308 goto again;
1309
1310 out:
1311 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1312
1313 return reader;
1314}
1315
1316static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1317{
1318 struct ring_buffer_event *event;
1319 struct buffer_page *reader;
1320 unsigned length;
1321
1322 reader = rb_get_reader_page(cpu_buffer);
1283 1323
1284 cpu_buffer->head += length; 1324 /* This function should not be called when buffer is empty */
1325 BUG_ON(!reader);
1285 1326
1286 /* check for end of page */ 1327 event = rb_reader_event(cpu_buffer);
1287 if ((cpu_buffer->head >= cpu_buffer->head_page->size) && 1328
1288 (cpu_buffer->head_page != cpu_buffer->tail_page)) 1329 if (event->type == RINGBUF_TYPE_DATA)
1289 rb_advance_head(cpu_buffer); 1330 cpu_buffer->entries--;
1331
1332 rb_update_read_stamp(cpu_buffer, event);
1333
1334 length = rb_event_length(event);
1335 cpu_buffer->reader += length;
1290} 1336}
1291 1337
1292static void rb_advance_iter(struct ring_buffer_iter *iter) 1338static void rb_advance_iter(struct ring_buffer_iter *iter)
@@ -1304,8 +1350,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1304 */ 1350 */
1305 if (iter->head >= iter->head_page->size) { 1351 if (iter->head >= iter->head_page->size) {
1306 BUG_ON(iter->head_page == cpu_buffer->tail_page); 1352 BUG_ON(iter->head_page == cpu_buffer->tail_page);
1307 rb_inc_page(cpu_buffer, &iter->head_page); 1353 rb_inc_iter(iter);
1308 rb_reset_iter_read_page(iter);
1309 return; 1354 return;
1310 } 1355 }
1311 1356
@@ -1344,6 +1389,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1344{ 1389{
1345 struct ring_buffer_per_cpu *cpu_buffer; 1390 struct ring_buffer_per_cpu *cpu_buffer;
1346 struct ring_buffer_event *event; 1391 struct ring_buffer_event *event;
1392 struct buffer_page *reader;
1347 1393
1348 if (!cpu_isset(cpu, buffer->cpumask)) 1394 if (!cpu_isset(cpu, buffer->cpumask))
1349 return NULL; 1395 return NULL;
@@ -1351,25 +1397,26 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1351 cpu_buffer = buffer->buffers[cpu]; 1397 cpu_buffer = buffer->buffers[cpu];
1352 1398
1353 again: 1399 again:
1354 if (rb_per_cpu_empty(cpu_buffer)) 1400 reader = rb_get_reader_page(cpu_buffer);
1401 if (!reader)
1355 return NULL; 1402 return NULL;
1356 1403
1357 event = rb_head_event(cpu_buffer); 1404 event = rb_reader_event(cpu_buffer);
1358 1405
1359 switch (event->type) { 1406 switch (event->type) {
1360 case RINGBUF_TYPE_PADDING: 1407 case RINGBUF_TYPE_PADDING:
1361 rb_inc_page(cpu_buffer, &cpu_buffer->head_page); 1408 WARN_ON(1);
1362 rb_reset_read_page(cpu_buffer); 1409 rb_advance_reader(cpu_buffer);
1363 goto again; 1410 return NULL;
1364 1411
1365 case RINGBUF_TYPE_TIME_EXTEND: 1412 case RINGBUF_TYPE_TIME_EXTEND:
1366 /* Internal data, OK to advance */ 1413 /* Internal data, OK to advance */
1367 rb_advance_head(cpu_buffer); 1414 rb_advance_reader(cpu_buffer);
1368 goto again; 1415 goto again;
1369 1416
1370 case RINGBUF_TYPE_TIME_STAMP: 1417 case RINGBUF_TYPE_TIME_STAMP:
1371 /* FIXME: not implemented */ 1418 /* FIXME: not implemented */
1372 rb_advance_head(cpu_buffer); 1419 rb_advance_reader(cpu_buffer);
1373 goto again; 1420 goto again;
1374 1421
1375 case RINGBUF_TYPE_DATA: 1422 case RINGBUF_TYPE_DATA:
@@ -1415,8 +1462,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1415 1462
1416 switch (event->type) { 1463 switch (event->type) {
1417 case RINGBUF_TYPE_PADDING: 1464 case RINGBUF_TYPE_PADDING:
1418 rb_inc_page(cpu_buffer, &iter->head_page); 1465 rb_inc_iter(iter);
1419 rb_reset_iter_read_page(iter);
1420 goto again; 1466 goto again;
1421 1467
1422 case RINGBUF_TYPE_TIME_EXTEND: 1468 case RINGBUF_TYPE_TIME_EXTEND:
@@ -1465,7 +1511,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1465 return NULL; 1511 return NULL;
1466 1512
1467 cpu_buffer = buffer->buffers[cpu]; 1513 cpu_buffer = buffer->buffers[cpu];
1468 rb_advance_head(cpu_buffer); 1514 rb_advance_reader(cpu_buffer);
1469 1515
1470 return event; 1516 return event;
1471} 1517}
@@ -1487,6 +1533,7 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1487{ 1533{
1488 struct ring_buffer_per_cpu *cpu_buffer; 1534 struct ring_buffer_per_cpu *cpu_buffer;
1489 struct ring_buffer_iter *iter; 1535 struct ring_buffer_iter *iter;
1536 unsigned long flags;
1490 1537
1491 if (!cpu_isset(cpu, buffer->cpumask)) 1538 if (!cpu_isset(cpu, buffer->cpumask))
1492 return NULL; 1539 return NULL;
@@ -1502,11 +1549,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1502 atomic_inc(&cpu_buffer->record_disabled); 1549 atomic_inc(&cpu_buffer->record_disabled);
1503 synchronize_sched(); 1550 synchronize_sched();
1504 1551
1505 spin_lock(&cpu_buffer->lock); 1552 spin_lock_irqsave(&cpu_buffer->lock, flags);
1506 iter->head = cpu_buffer->head; 1553 ring_buffer_iter_reset(iter);
1507 iter->head_page = cpu_buffer->head_page; 1554 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1508 rb_reset_iter_read_page(iter);
1509 spin_unlock(&cpu_buffer->lock);
1510 1555
1511 return iter; 1556 return iter;
1512} 1557}
@@ -1562,10 +1607,14 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1562{ 1607{
1563 cpu_buffer->head_page 1608 cpu_buffer->head_page
1564 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 1609 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
1565 cpu_buffer->tail_page 1610 cpu_buffer->head_page->size = 0;
1566 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 1611 cpu_buffer->tail_page = cpu_buffer->head_page;
1612 cpu_buffer->tail_page->size = 0;
1613 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1614 cpu_buffer->reader_page->size = 0;
1615
1616 cpu_buffer->head = cpu_buffer->tail = cpu_buffer->reader = 0;
1567 1617
1568 cpu_buffer->head = cpu_buffer->tail = 0;
1569 cpu_buffer->overrun = 0; 1618 cpu_buffer->overrun = 0;
1570 cpu_buffer->entries = 0; 1619 cpu_buffer->entries = 0;
1571} 1620}
@@ -1583,13 +1632,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
1583 if (!cpu_isset(cpu, buffer->cpumask)) 1632 if (!cpu_isset(cpu, buffer->cpumask))
1584 return; 1633 return;
1585 1634
1586 local_irq_save(flags); 1635 spin_lock_irqsave(&cpu_buffer->lock, flags);
1587 spin_lock(&cpu_buffer->lock);
1588 1636
1589 rb_reset_cpu(cpu_buffer); 1637 rb_reset_cpu(cpu_buffer);
1590 1638
1591 spin_unlock(&cpu_buffer->lock); 1639 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1592 local_irq_restore(flags);
1593} 1640}
1594 1641
1595/** 1642/**
@@ -1598,15 +1645,10 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
1598 */ 1645 */
1599void ring_buffer_reset(struct ring_buffer *buffer) 1646void ring_buffer_reset(struct ring_buffer *buffer)
1600{ 1647{
1601 unsigned long flags;
1602 int cpu; 1648 int cpu;
1603 1649
1604 ring_buffer_lock(buffer, &flags);
1605
1606 for_each_buffer_cpu(buffer, cpu) 1650 for_each_buffer_cpu(buffer, cpu)
1607 rb_reset_cpu(buffer->buffers[cpu]); 1651 ring_buffer_reset_cpu(buffer, cpu);
1608
1609 ring_buffer_unlock(buffer, flags);
1610} 1652}
1611 1653
1612/** 1654/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6a1c76bb56ba..b542f8837801 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -42,6 +42,20 @@
42unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 42unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
43unsigned long __read_mostly tracing_thresh; 43unsigned long __read_mostly tracing_thresh;
44 44
45static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
46
47static inline void ftrace_disable_cpu(void)
48{
49 preempt_disable();
50 local_inc(&__get_cpu_var(ftrace_cpu_disabled));
51}
52
53static inline void ftrace_enable_cpu(void)
54{
55 local_dec(&__get_cpu_var(ftrace_cpu_disabled));
56 preempt_enable();
57}
58
45static cpumask_t __read_mostly tracing_buffer_mask; 59static cpumask_t __read_mostly tracing_buffer_mask;
46 60
47#define for_each_tracing_cpu(cpu) \ 61#define for_each_tracing_cpu(cpu) \
@@ -406,7 +420,9 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
406 tr->buffer = max_tr.buffer; 420 tr->buffer = max_tr.buffer;
407 max_tr.buffer = buf; 421 max_tr.buffer = buf;
408 422
423 ftrace_disable_cpu();
409 ring_buffer_reset(tr->buffer); 424 ring_buffer_reset(tr->buffer);
425 ftrace_enable_cpu();
410 426
411 __update_max_tr(tr, tsk, cpu); 427 __update_max_tr(tr, tsk, cpu);
412 __raw_spin_unlock(&ftrace_max_lock); 428 __raw_spin_unlock(&ftrace_max_lock);
@@ -428,9 +444,13 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
428 WARN_ON_ONCE(!irqs_disabled()); 444 WARN_ON_ONCE(!irqs_disabled());
429 __raw_spin_lock(&ftrace_max_lock); 445 __raw_spin_lock(&ftrace_max_lock);
430 446
447 ftrace_disable_cpu();
448
431 ring_buffer_reset(max_tr.buffer); 449 ring_buffer_reset(max_tr.buffer);
432 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); 450 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
433 451
452 ftrace_enable_cpu();
453
434 WARN_ON_ONCE(ret); 454 WARN_ON_ONCE(ret);
435 455
436 __update_max_tr(tr, tsk, cpu); 456 __update_max_tr(tr, tsk, cpu);
@@ -543,7 +563,9 @@ void unregister_tracer(struct tracer *type)
543 563
544void tracing_reset(struct trace_array *tr, int cpu) 564void tracing_reset(struct trace_array *tr, int cpu)
545{ 565{
566 ftrace_disable_cpu();
546 ring_buffer_reset_cpu(tr->buffer, cpu); 567 ring_buffer_reset_cpu(tr->buffer, cpu);
568 ftrace_enable_cpu();
547} 569}
548 570
549#define SAVED_CMDLINES 128 571#define SAVED_CMDLINES 128
@@ -654,6 +676,10 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
654 struct ftrace_entry *entry; 676 struct ftrace_entry *entry;
655 unsigned long irq_flags; 677 unsigned long irq_flags;
656 678
679 /* If we are reading the ring buffer, don't trace */
680 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
681 return;
682
657 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 683 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
658 &irq_flags); 684 &irq_flags);
659 if (!event) 685 if (!event)
@@ -870,8 +896,14 @@ enum trace_file_type {
870 896
871static void trace_iterator_increment(struct trace_iterator *iter, int cpu) 897static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
872{ 898{
899 /* Don't allow ftrace to trace into the ring buffers */
900 ftrace_disable_cpu();
901
873 iter->idx++; 902 iter->idx++;
874 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); 903 if (iter->buffer_iter[iter->cpu])
904 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
905
906 ftrace_enable_cpu();
875} 907}
876 908
877static struct trace_entry * 909static struct trace_entry *
@@ -880,9 +912,19 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
880 struct ring_buffer_event *event; 912 struct ring_buffer_event *event;
881 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; 913 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
882 914
883 event = ring_buffer_iter_peek(buf_iter, ts); 915 /* Don't allow ftrace to trace into the ring buffers */
916 ftrace_disable_cpu();
917
918 if (buf_iter)
919 event = ring_buffer_iter_peek(buf_iter, ts);
920 else
921 event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
922
923 ftrace_enable_cpu();
924
884 return event ? ring_buffer_event_data(event) : NULL; 925 return event ? ring_buffer_event_data(event) : NULL;
885} 926}
927
886static struct trace_entry * 928static struct trace_entry *
887__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) 929__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
888{ 930{
@@ -938,7 +980,10 @@ static void *find_next_entry_inc(struct trace_iterator *iter)
938 980
939static void trace_consume(struct trace_iterator *iter) 981static void trace_consume(struct trace_iterator *iter)
940{ 982{
983 /* Don't allow ftrace to trace into the ring buffers */
984 ftrace_disable_cpu();
941 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts); 985 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
986 ftrace_enable_cpu();
942} 987}
943 988
944static void *s_next(struct seq_file *m, void *v, loff_t *pos) 989static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -991,10 +1036,14 @@ static void *s_start(struct seq_file *m, loff_t *pos)
991 iter->cpu = 0; 1036 iter->cpu = 0;
992 iter->idx = -1; 1037 iter->idx = -1;
993 1038
1039 ftrace_disable_cpu();
1040
994 for_each_tracing_cpu(cpu) { 1041 for_each_tracing_cpu(cpu) {
995 ring_buffer_iter_reset(iter->buffer_iter[cpu]); 1042 ring_buffer_iter_reset(iter->buffer_iter[cpu]);
996 } 1043 }
997 1044
1045 ftrace_enable_cpu();
1046
998 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1047 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
999 ; 1048 ;
1000 1049
@@ -1242,7 +1291,16 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1242 cont = (struct trace_field_cont *)ent; 1291 cont = (struct trace_field_cont *)ent;
1243 if (ok) 1292 if (ok)
1244 ok = (trace_seq_printf(s, "%s", cont->buf) > 0); 1293 ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
1245 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); 1294
1295 ftrace_disable_cpu();
1296
1297 if (iter->buffer_iter[iter->cpu])
1298 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1299 else
1300 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
1301
1302 ftrace_enable_cpu();
1303
1246 ent = peek_next_entry(iter, iter->cpu, NULL); 1304 ent = peek_next_entry(iter, iter->cpu, NULL);
1247 } while (ent && ent->type == TRACE_CONT); 1305 } while (ent && ent->type == TRACE_CONT);
1248 1306
@@ -1683,9 +1741,15 @@ static int trace_empty(struct trace_iterator *iter)
1683 int cpu; 1741 int cpu;
1684 1742
1685 for_each_tracing_cpu(cpu) { 1743 for_each_tracing_cpu(cpu) {
1686 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) 1744 if (iter->buffer_iter[cpu]) {
1687 return 0; 1745 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1746 return 0;
1747 } else {
1748 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1749 return 0;
1750 }
1688 } 1751 }
1752
1689 return TRACE_TYPE_HANDLED; 1753 return TRACE_TYPE_HANDLED;
1690} 1754}
1691 1755
@@ -1776,8 +1840,10 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1776 iter->pos = -1; 1840 iter->pos = -1;
1777 1841
1778 for_each_tracing_cpu(cpu) { 1842 for_each_tracing_cpu(cpu) {
1843
1779 iter->buffer_iter[cpu] = 1844 iter->buffer_iter[cpu] =
1780 ring_buffer_read_start(iter->tr->buffer, cpu); 1845 ring_buffer_read_start(iter->tr->buffer, cpu);
1846
1781 if (!iter->buffer_iter[cpu]) 1847 if (!iter->buffer_iter[cpu])
1782 goto fail_buffer; 1848 goto fail_buffer;
1783 } 1849 }
@@ -2341,7 +2407,6 @@ static atomic_t tracing_reader;
2341static int tracing_open_pipe(struct inode *inode, struct file *filp) 2407static int tracing_open_pipe(struct inode *inode, struct file *filp)
2342{ 2408{
2343 struct trace_iterator *iter; 2409 struct trace_iterator *iter;
2344 int cpu;
2345 2410
2346 if (tracing_disabled) 2411 if (tracing_disabled)
2347 return -ENODEV; 2412 return -ENODEV;
@@ -2362,38 +2427,17 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2362 iter->trace = current_trace; 2427 iter->trace = current_trace;
2363 filp->private_data = iter; 2428 filp->private_data = iter;
2364 2429
2365 for_each_tracing_cpu(cpu) {
2366 iter->buffer_iter[cpu] =
2367 ring_buffer_read_start(iter->tr->buffer, cpu);
2368 if (!iter->buffer_iter[cpu])
2369 goto fail_buffer;
2370 }
2371
2372 if (iter->trace->pipe_open) 2430 if (iter->trace->pipe_open)
2373 iter->trace->pipe_open(iter); 2431 iter->trace->pipe_open(iter);
2374 mutex_unlock(&trace_types_lock); 2432 mutex_unlock(&trace_types_lock);
2375 2433
2376 return 0; 2434 return 0;
2377
2378 fail_buffer:
2379 for_each_tracing_cpu(cpu) {
2380 if (iter->buffer_iter[cpu])
2381 ring_buffer_read_finish(iter->buffer_iter[cpu]);
2382 }
2383 mutex_unlock(&trace_types_lock);
2384
2385 return -ENOMEM;
2386} 2435}
2387 2436
2388static int tracing_release_pipe(struct inode *inode, struct file *file) 2437static int tracing_release_pipe(struct inode *inode, struct file *file)
2389{ 2438{
2390 struct trace_iterator *iter = file->private_data; 2439 struct trace_iterator *iter = file->private_data;
2391 int cpu;
2392 2440
2393 for_each_tracing_cpu(cpu) {
2394 if (iter->buffer_iter[cpu])
2395 ring_buffer_read_finish(iter->buffer_iter[cpu]);
2396 }
2397 kfree(iter); 2441 kfree(iter);
2398 atomic_dec(&tracing_reader); 2442 atomic_dec(&tracing_reader);
2399 2443
@@ -2429,7 +2473,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2429 size_t cnt, loff_t *ppos) 2473 size_t cnt, loff_t *ppos)
2430{ 2474{
2431 struct trace_iterator *iter = filp->private_data; 2475 struct trace_iterator *iter = filp->private_data;
2432 unsigned long flags;
2433#ifdef CONFIG_FTRACE 2476#ifdef CONFIG_FTRACE
2434 int ftrace_save; 2477 int ftrace_save;
2435#endif 2478#endif
@@ -2528,7 +2571,6 @@ waitagain:
2528 ftrace_enabled = 0; 2571 ftrace_enabled = 0;
2529#endif 2572#endif
2530 smp_wmb(); 2573 smp_wmb();
2531 ring_buffer_lock(iter->tr->buffer, &flags);
2532 2574
2533 while (find_next_entry_inc(iter) != NULL) { 2575 while (find_next_entry_inc(iter) != NULL) {
2534 enum print_line_t ret; 2576 enum print_line_t ret;
@@ -2547,7 +2589,6 @@ waitagain:
2547 break; 2589 break;
2548 } 2590 }
2549 2591
2550 ring_buffer_unlock(iter->tr->buffer, flags);
2551#ifdef CONFIG_FTRACE 2592#ifdef CONFIG_FTRACE
2552 ftrace_enabled = ftrace_save; 2593 ftrace_enabled = ftrace_save;
2553#endif 2594#endif
@@ -3010,8 +3051,8 @@ void ftrace_dump(void)
3010 static struct trace_iterator iter; 3051 static struct trace_iterator iter;
3011 static cpumask_t mask; 3052 static cpumask_t mask;
3012 static int dump_ran; 3053 static int dump_ran;
3013 unsigned long flags, irq_flags; 3054 unsigned long flags;
3014 int cnt = 0; 3055 int cnt = 0, cpu;
3015 3056
3016 /* only one dump */ 3057 /* only one dump */
3017 spin_lock_irqsave(&ftrace_dump_lock, flags); 3058 spin_lock_irqsave(&ftrace_dump_lock, flags);
@@ -3023,6 +3064,10 @@ void ftrace_dump(void)
3023 /* No turning back! */ 3064 /* No turning back! */
3024 ftrace_kill_atomic(); 3065 ftrace_kill_atomic();
3025 3066
3067 for_each_tracing_cpu(cpu) {
3068 atomic_inc(&global_trace.data[cpu]->disabled);
3069 }
3070
3026 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 3071 printk(KERN_TRACE "Dumping ftrace buffer:\n");
3027 3072
3028 iter.tr = &global_trace; 3073 iter.tr = &global_trace;
@@ -3037,8 +3082,6 @@ void ftrace_dump(void)
3037 3082
3038 cpus_clear(mask); 3083 cpus_clear(mask);
3039 3084
3040 ring_buffer_lock(iter.tr->buffer, &irq_flags);
3041
3042 while (!trace_empty(&iter)) { 3085 while (!trace_empty(&iter)) {
3043 3086
3044 if (!cnt) 3087 if (!cnt)
@@ -3066,8 +3109,6 @@ void ftrace_dump(void)
3066 else 3109 else
3067 printk(KERN_TRACE "---------------------------------\n"); 3110 printk(KERN_TRACE "---------------------------------\n");
3068 3111
3069 ring_buffer_unlock(iter.tr->buffer, irq_flags);
3070
3071 out: 3112 out:
3072 spin_unlock_irqrestore(&ftrace_dump_lock, flags); 3113 spin_unlock_irqrestore(&ftrace_dump_lock, flags);
3073} 3114}