diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 311 |
1 files changed, 193 insertions, 118 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index dc4dc70171ce..04dac2638258 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -206,6 +206,7 @@ EXPORT_SYMBOL_GPL(tracing_is_on); | |||
206 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) | 206 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) |
207 | #define RB_ALIGNMENT 4U | 207 | #define RB_ALIGNMENT 4U |
208 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 208 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
209 | #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ | ||
209 | 210 | ||
210 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ | 211 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ |
211 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX | 212 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX |
@@ -415,6 +416,8 @@ struct ring_buffer_per_cpu { | |||
415 | unsigned long overrun; | 416 | unsigned long overrun; |
416 | unsigned long read; | 417 | unsigned long read; |
417 | local_t entries; | 418 | local_t entries; |
419 | local_t committing; | ||
420 | local_t commits; | ||
418 | u64 write_stamp; | 421 | u64 write_stamp; |
419 | u64 read_stamp; | 422 | u64 read_stamp; |
420 | atomic_t record_disabled; | 423 | atomic_t record_disabled; |
@@ -618,12 +621,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) | |||
618 | kfree(cpu_buffer); | 621 | kfree(cpu_buffer); |
619 | } | 622 | } |
620 | 623 | ||
621 | /* | ||
622 | * Causes compile errors if the struct buffer_page gets bigger | ||
623 | * than the struct page. | ||
624 | */ | ||
625 | extern int ring_buffer_page_too_big(void); | ||
626 | |||
627 | #ifdef CONFIG_HOTPLUG_CPU | 624 | #ifdef CONFIG_HOTPLUG_CPU |
628 | static int rb_cpu_notify(struct notifier_block *self, | 625 | static int rb_cpu_notify(struct notifier_block *self, |
629 | unsigned long action, void *hcpu); | 626 | unsigned long action, void *hcpu); |
@@ -646,11 +643,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, | |||
646 | int bsize; | 643 | int bsize; |
647 | int cpu; | 644 | int cpu; |
648 | 645 | ||
649 | /* Paranoid! Optimizes out when all is well */ | ||
650 | if (sizeof(struct buffer_page) > sizeof(struct page)) | ||
651 | ring_buffer_page_too_big(); | ||
652 | |||
653 | |||
654 | /* keep it in its own cache line */ | 646 | /* keep it in its own cache line */ |
655 | buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), | 647 | buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), |
656 | GFP_KERNEL); | 648 | GFP_KERNEL); |
@@ -666,8 +658,8 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, | |||
666 | buffer->reader_lock_key = key; | 658 | buffer->reader_lock_key = key; |
667 | 659 | ||
668 | /* need at least two pages */ | 660 | /* need at least two pages */ |
669 | if (buffer->pages == 1) | 661 | if (buffer->pages < 2) |
670 | buffer->pages++; | 662 | buffer->pages = 2; |
671 | 663 | ||
672 | /* | 664 | /* |
673 | * In case of non-hotplug cpu, if the ring-buffer is allocated | 665 | * In case of non-hotplug cpu, if the ring-buffer is allocated |
@@ -1011,12 +1003,12 @@ rb_event_index(struct ring_buffer_event *event) | |||
1011 | { | 1003 | { |
1012 | unsigned long addr = (unsigned long)event; | 1004 | unsigned long addr = (unsigned long)event; |
1013 | 1005 | ||
1014 | return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); | 1006 | return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE; |
1015 | } | 1007 | } |
1016 | 1008 | ||
1017 | static inline int | 1009 | static inline int |
1018 | rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | 1010 | rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, |
1019 | struct ring_buffer_event *event) | 1011 | struct ring_buffer_event *event) |
1020 | { | 1012 | { |
1021 | unsigned long addr = (unsigned long)event; | 1013 | unsigned long addr = (unsigned long)event; |
1022 | unsigned long index; | 1014 | unsigned long index; |
@@ -1029,31 +1021,6 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
1029 | } | 1021 | } |
1030 | 1022 | ||
1031 | static void | 1023 | static void |
1032 | rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, | ||
1033 | struct ring_buffer_event *event) | ||
1034 | { | ||
1035 | unsigned long addr = (unsigned long)event; | ||
1036 | unsigned long index; | ||
1037 | |||
1038 | index = rb_event_index(event); | ||
1039 | addr &= PAGE_MASK; | ||
1040 | |||
1041 | while (cpu_buffer->commit_page->page != (void *)addr) { | ||
1042 | if (RB_WARN_ON(cpu_buffer, | ||
1043 | cpu_buffer->commit_page == cpu_buffer->tail_page)) | ||
1044 | return; | ||
1045 | cpu_buffer->commit_page->page->commit = | ||
1046 | cpu_buffer->commit_page->write; | ||
1047 | rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); | ||
1048 | cpu_buffer->write_stamp = | ||
1049 | cpu_buffer->commit_page->page->time_stamp; | ||
1050 | } | ||
1051 | |||
1052 | /* Now set the commit to the event's index */ | ||
1053 | local_set(&cpu_buffer->commit_page->page->commit, index); | ||
1054 | } | ||
1055 | |||
1056 | static void | ||
1057 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | 1024 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) |
1058 | { | 1025 | { |
1059 | /* | 1026 | /* |
@@ -1171,6 +1138,60 @@ static unsigned rb_calculate_event_length(unsigned length) | |||
1171 | return length; | 1138 | return length; |
1172 | } | 1139 | } |
1173 | 1140 | ||
1141 | static inline void | ||
1142 | rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | ||
1143 | struct buffer_page *tail_page, | ||
1144 | unsigned long tail, unsigned long length) | ||
1145 | { | ||
1146 | struct ring_buffer_event *event; | ||
1147 | |||
1148 | /* | ||
1149 | * Only the event that crossed the page boundary | ||
1150 | * must fill the old tail_page with padding. | ||
1151 | */ | ||
1152 | if (tail >= BUF_PAGE_SIZE) { | ||
1153 | local_sub(length, &tail_page->write); | ||
1154 | return; | ||
1155 | } | ||
1156 | |||
1157 | event = __rb_page_index(tail_page, tail); | ||
1158 | kmemcheck_annotate_bitfield(event, bitfield); | ||
1159 | |||
1160 | /* | ||
1161 | * If this event is bigger than the minimum size, then | ||
1162 | * we need to be careful that we don't subtract the | ||
1163 | * write counter enough to allow another writer to slip | ||
1164 | * in on this page. | ||
1165 | * We put in a discarded commit instead, to make sure | ||
1166 | * that this space is not used again. | ||
1167 | * | ||
1168 | * If we are less than the minimum size, we don't need to | ||
1169 | * worry about it. | ||
1170 | */ | ||
1171 | if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) { | ||
1172 | /* No room for any events */ | ||
1173 | |||
1174 | /* Mark the rest of the page with padding */ | ||
1175 | rb_event_set_padding(event); | ||
1176 | |||
1177 | /* Set the write back to the previous setting */ | ||
1178 | local_sub(length, &tail_page->write); | ||
1179 | return; | ||
1180 | } | ||
1181 | |||
1182 | /* Put in a discarded event */ | ||
1183 | event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE; | ||
1184 | event->type_len = RINGBUF_TYPE_PADDING; | ||
1185 | /* time delta must be non zero */ | ||
1186 | event->time_delta = 1; | ||
1187 | /* Account for this as an entry */ | ||
1188 | local_inc(&tail_page->entries); | ||
1189 | local_inc(&cpu_buffer->entries); | ||
1190 | |||
1191 | /* Set write to end of buffer */ | ||
1192 | length = (tail + length) - BUF_PAGE_SIZE; | ||
1193 | local_sub(length, &tail_page->write); | ||
1194 | } | ||
1174 | 1195 | ||
1175 | static struct ring_buffer_event * | 1196 | static struct ring_buffer_event * |
1176 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | 1197 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, |
@@ -1180,7 +1201,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1180 | { | 1201 | { |
1181 | struct buffer_page *next_page, *head_page, *reader_page; | 1202 | struct buffer_page *next_page, *head_page, *reader_page; |
1182 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1203 | struct ring_buffer *buffer = cpu_buffer->buffer; |
1183 | struct ring_buffer_event *event; | ||
1184 | bool lock_taken = false; | 1204 | bool lock_taken = false; |
1185 | unsigned long flags; | 1205 | unsigned long flags; |
1186 | 1206 | ||
@@ -1265,27 +1285,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1265 | cpu_buffer->tail_page->page->time_stamp = *ts; | 1285 | cpu_buffer->tail_page->page->time_stamp = *ts; |
1266 | } | 1286 | } |
1267 | 1287 | ||
1268 | /* | 1288 | rb_reset_tail(cpu_buffer, tail_page, tail, length); |
1269 | * The actual tail page has moved forward. | ||
1270 | */ | ||
1271 | if (tail < BUF_PAGE_SIZE) { | ||
1272 | /* Mark the rest of the page with padding */ | ||
1273 | event = __rb_page_index(tail_page, tail); | ||
1274 | kmemcheck_annotate_bitfield(event, bitfield); | ||
1275 | rb_event_set_padding(event); | ||
1276 | } | ||
1277 | |||
1278 | /* Set the write back to the previous setting */ | ||
1279 | local_sub(length, &tail_page->write); | ||
1280 | |||
1281 | /* | ||
1282 | * If this was a commit entry that failed, | ||
1283 | * increment that too | ||
1284 | */ | ||
1285 | if (tail_page == cpu_buffer->commit_page && | ||
1286 | tail == rb_commit_index(cpu_buffer)) { | ||
1287 | rb_set_commit_to_write(cpu_buffer); | ||
1288 | } | ||
1289 | 1289 | ||
1290 | __raw_spin_unlock(&cpu_buffer->lock); | 1290 | __raw_spin_unlock(&cpu_buffer->lock); |
1291 | local_irq_restore(flags); | 1291 | local_irq_restore(flags); |
@@ -1295,7 +1295,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1295 | 1295 | ||
1296 | out_reset: | 1296 | out_reset: |
1297 | /* reset write */ | 1297 | /* reset write */ |
1298 | local_sub(length, &tail_page->write); | 1298 | rb_reset_tail(cpu_buffer, tail_page, tail, length); |
1299 | 1299 | ||
1300 | if (likely(lock_taken)) | 1300 | if (likely(lock_taken)) |
1301 | __raw_spin_unlock(&cpu_buffer->lock); | 1301 | __raw_spin_unlock(&cpu_buffer->lock); |
@@ -1325,9 +1325,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1325 | 1325 | ||
1326 | /* We reserved something on the buffer */ | 1326 | /* We reserved something on the buffer */ |
1327 | 1327 | ||
1328 | if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE)) | ||
1329 | return NULL; | ||
1330 | |||
1331 | event = __rb_page_index(tail_page, tail); | 1328 | event = __rb_page_index(tail_page, tail); |
1332 | kmemcheck_annotate_bitfield(event, bitfield); | 1329 | kmemcheck_annotate_bitfield(event, bitfield); |
1333 | rb_update_event(event, type, length); | 1330 | rb_update_event(event, type, length); |
@@ -1337,11 +1334,11 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1337 | local_inc(&tail_page->entries); | 1334 | local_inc(&tail_page->entries); |
1338 | 1335 | ||
1339 | /* | 1336 | /* |
1340 | * If this is a commit and the tail is zero, then update | 1337 | * If this is the first commit on the page, then update |
1341 | * this page's time stamp. | 1338 | * its timestamp. |
1342 | */ | 1339 | */ |
1343 | if (!tail && rb_is_commit(cpu_buffer, event)) | 1340 | if (!tail) |
1344 | cpu_buffer->commit_page->page->time_stamp = *ts; | 1341 | tail_page->page->time_stamp = *ts; |
1345 | 1342 | ||
1346 | return event; | 1343 | return event; |
1347 | } | 1344 | } |
@@ -1410,16 +1407,16 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
1410 | return -EAGAIN; | 1407 | return -EAGAIN; |
1411 | 1408 | ||
1412 | /* Only a commited time event can update the write stamp */ | 1409 | /* Only a commited time event can update the write stamp */ |
1413 | if (rb_is_commit(cpu_buffer, event)) { | 1410 | if (rb_event_is_commit(cpu_buffer, event)) { |
1414 | /* | 1411 | /* |
1415 | * If this is the first on the page, then we need to | 1412 | * If this is the first on the page, then it was |
1416 | * update the page itself, and just put in a zero. | 1413 | * updated with the page itself. Try to discard it |
1414 | * and if we can't just make it zero. | ||
1417 | */ | 1415 | */ |
1418 | if (rb_event_index(event)) { | 1416 | if (rb_event_index(event)) { |
1419 | event->time_delta = *delta & TS_MASK; | 1417 | event->time_delta = *delta & TS_MASK; |
1420 | event->array[0] = *delta >> TS_SHIFT; | 1418 | event->array[0] = *delta >> TS_SHIFT; |
1421 | } else { | 1419 | } else { |
1422 | cpu_buffer->commit_page->page->time_stamp = *ts; | ||
1423 | /* try to discard, since we do not need this */ | 1420 | /* try to discard, since we do not need this */ |
1424 | if (!rb_try_to_discard(cpu_buffer, event)) { | 1421 | if (!rb_try_to_discard(cpu_buffer, event)) { |
1425 | /* nope, just zero it */ | 1422 | /* nope, just zero it */ |
@@ -1445,6 +1442,44 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
1445 | return ret; | 1442 | return ret; |
1446 | } | 1443 | } |
1447 | 1444 | ||
1445 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) | ||
1446 | { | ||
1447 | local_inc(&cpu_buffer->committing); | ||
1448 | local_inc(&cpu_buffer->commits); | ||
1449 | } | ||
1450 | |||
1451 | static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) | ||
1452 | { | ||
1453 | unsigned long commits; | ||
1454 | |||
1455 | if (RB_WARN_ON(cpu_buffer, | ||
1456 | !local_read(&cpu_buffer->committing))) | ||
1457 | return; | ||
1458 | |||
1459 | again: | ||
1460 | commits = local_read(&cpu_buffer->commits); | ||
1461 | /* synchronize with interrupts */ | ||
1462 | barrier(); | ||
1463 | if (local_read(&cpu_buffer->committing) == 1) | ||
1464 | rb_set_commit_to_write(cpu_buffer); | ||
1465 | |||
1466 | local_dec(&cpu_buffer->committing); | ||
1467 | |||
1468 | /* synchronize with interrupts */ | ||
1469 | barrier(); | ||
1470 | |||
1471 | /* | ||
1472 | * Need to account for interrupts coming in between the | ||
1473 | * updating of the commit page and the clearing of the | ||
1474 | * committing counter. | ||
1475 | */ | ||
1476 | if (unlikely(local_read(&cpu_buffer->commits) != commits) && | ||
1477 | !local_read(&cpu_buffer->committing)) { | ||
1478 | local_inc(&cpu_buffer->committing); | ||
1479 | goto again; | ||
1480 | } | ||
1481 | } | ||
1482 | |||
1448 | static struct ring_buffer_event * | 1483 | static struct ring_buffer_event * |
1449 | rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | 1484 | rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, |
1450 | unsigned long length) | 1485 | unsigned long length) |
@@ -1454,6 +1489,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1454 | int commit = 0; | 1489 | int commit = 0; |
1455 | int nr_loops = 0; | 1490 | int nr_loops = 0; |
1456 | 1491 | ||
1492 | rb_start_commit(cpu_buffer); | ||
1493 | |||
1457 | length = rb_calculate_event_length(length); | 1494 | length = rb_calculate_event_length(length); |
1458 | again: | 1495 | again: |
1459 | /* | 1496 | /* |
@@ -1466,7 +1503,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1466 | * Bail! | 1503 | * Bail! |
1467 | */ | 1504 | */ |
1468 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) | 1505 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) |
1469 | return NULL; | 1506 | goto out_fail; |
1470 | 1507 | ||
1471 | ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); | 1508 | ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); |
1472 | 1509 | ||
@@ -1497,7 +1534,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1497 | 1534 | ||
1498 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); | 1535 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); |
1499 | if (commit == -EBUSY) | 1536 | if (commit == -EBUSY) |
1500 | return NULL; | 1537 | goto out_fail; |
1501 | 1538 | ||
1502 | if (commit == -EAGAIN) | 1539 | if (commit == -EAGAIN) |
1503 | goto again; | 1540 | goto again; |
@@ -1511,28 +1548,19 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1511 | if (unlikely(PTR_ERR(event) == -EAGAIN)) | 1548 | if (unlikely(PTR_ERR(event) == -EAGAIN)) |
1512 | goto again; | 1549 | goto again; |
1513 | 1550 | ||
1514 | if (!event) { | 1551 | if (!event) |
1515 | if (unlikely(commit)) | 1552 | goto out_fail; |
1516 | /* | ||
1517 | * Ouch! We needed a timestamp and it was commited. But | ||
1518 | * we didn't get our event reserved. | ||
1519 | */ | ||
1520 | rb_set_commit_to_write(cpu_buffer); | ||
1521 | return NULL; | ||
1522 | } | ||
1523 | 1553 | ||
1524 | /* | 1554 | if (!rb_event_is_commit(cpu_buffer, event)) |
1525 | * If the timestamp was commited, make the commit our entry | ||
1526 | * now so that we will update it when needed. | ||
1527 | */ | ||
1528 | if (unlikely(commit)) | ||
1529 | rb_set_commit_event(cpu_buffer, event); | ||
1530 | else if (!rb_is_commit(cpu_buffer, event)) | ||
1531 | delta = 0; | 1555 | delta = 0; |
1532 | 1556 | ||
1533 | event->time_delta = delta; | 1557 | event->time_delta = delta; |
1534 | 1558 | ||
1535 | return event; | 1559 | return event; |
1560 | |||
1561 | out_fail: | ||
1562 | rb_end_commit(cpu_buffer); | ||
1563 | return NULL; | ||
1536 | } | 1564 | } |
1537 | 1565 | ||
1538 | #define TRACE_RECURSIVE_DEPTH 16 | 1566 | #define TRACE_RECURSIVE_DEPTH 16 |
@@ -1642,13 +1670,14 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
1642 | { | 1670 | { |
1643 | local_inc(&cpu_buffer->entries); | 1671 | local_inc(&cpu_buffer->entries); |
1644 | 1672 | ||
1645 | /* Only process further if we own the commit */ | 1673 | /* |
1646 | if (!rb_is_commit(cpu_buffer, event)) | 1674 | * The event first in the commit queue updates the |
1647 | return; | 1675 | * time stamp. |
1648 | 1676 | */ | |
1649 | cpu_buffer->write_stamp += event->time_delta; | 1677 | if (rb_event_is_commit(cpu_buffer, event)) |
1678 | cpu_buffer->write_stamp += event->time_delta; | ||
1650 | 1679 | ||
1651 | rb_set_commit_to_write(cpu_buffer); | 1680 | rb_end_commit(cpu_buffer); |
1652 | } | 1681 | } |
1653 | 1682 | ||
1654 | /** | 1683 | /** |
@@ -1737,15 +1766,15 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, | |||
1737 | /* The event is discarded regardless */ | 1766 | /* The event is discarded regardless */ |
1738 | rb_event_discard(event); | 1767 | rb_event_discard(event); |
1739 | 1768 | ||
1769 | cpu = smp_processor_id(); | ||
1770 | cpu_buffer = buffer->buffers[cpu]; | ||
1771 | |||
1740 | /* | 1772 | /* |
1741 | * This must only be called if the event has not been | 1773 | * This must only be called if the event has not been |
1742 | * committed yet. Thus we can assume that preemption | 1774 | * committed yet. Thus we can assume that preemption |
1743 | * is still disabled. | 1775 | * is still disabled. |
1744 | */ | 1776 | */ |
1745 | RB_WARN_ON(buffer, preemptible()); | 1777 | RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); |
1746 | |||
1747 | cpu = smp_processor_id(); | ||
1748 | cpu_buffer = buffer->buffers[cpu]; | ||
1749 | 1778 | ||
1750 | if (!rb_try_to_discard(cpu_buffer, event)) | 1779 | if (!rb_try_to_discard(cpu_buffer, event)) |
1751 | goto out; | 1780 | goto out; |
@@ -1756,13 +1785,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, | |||
1756 | */ | 1785 | */ |
1757 | local_inc(&cpu_buffer->entries); | 1786 | local_inc(&cpu_buffer->entries); |
1758 | out: | 1787 | out: |
1759 | /* | 1788 | rb_end_commit(cpu_buffer); |
1760 | * If a write came in and pushed the tail page | ||
1761 | * we still need to update the commit pointer | ||
1762 | * if we were the commit. | ||
1763 | */ | ||
1764 | if (rb_is_commit(cpu_buffer, event)) | ||
1765 | rb_set_commit_to_write(cpu_buffer); | ||
1766 | 1789 | ||
1767 | trace_recursive_unlock(); | 1790 | trace_recursive_unlock(); |
1768 | 1791 | ||
@@ -2446,6 +2469,21 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2446 | } | 2469 | } |
2447 | EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); | 2470 | EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); |
2448 | 2471 | ||
2472 | static inline int rb_ok_to_lock(void) | ||
2473 | { | ||
2474 | /* | ||
2475 | * If an NMI die dumps out the content of the ring buffer | ||
2476 | * do not grab locks. We also permanently disable the ring | ||
2477 | * buffer too. A one time deal is all you get from reading | ||
2478 | * the ring buffer from an NMI. | ||
2479 | */ | ||
2480 | if (likely(!in_nmi() && !oops_in_progress)) | ||
2481 | return 1; | ||
2482 | |||
2483 | tracing_off_permanent(); | ||
2484 | return 0; | ||
2485 | } | ||
2486 | |||
2449 | /** | 2487 | /** |
2450 | * ring_buffer_peek - peek at the next event to be read | 2488 | * ring_buffer_peek - peek at the next event to be read |
2451 | * @buffer: The ring buffer to read | 2489 | * @buffer: The ring buffer to read |
@@ -2461,14 +2499,20 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2461 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 2499 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
2462 | struct ring_buffer_event *event; | 2500 | struct ring_buffer_event *event; |
2463 | unsigned long flags; | 2501 | unsigned long flags; |
2502 | int dolock; | ||
2464 | 2503 | ||
2465 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2504 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2466 | return NULL; | 2505 | return NULL; |
2467 | 2506 | ||
2507 | dolock = rb_ok_to_lock(); | ||
2468 | again: | 2508 | again: |
2469 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2509 | local_irq_save(flags); |
2510 | if (dolock) | ||
2511 | spin_lock(&cpu_buffer->reader_lock); | ||
2470 | event = rb_buffer_peek(buffer, cpu, ts); | 2512 | event = rb_buffer_peek(buffer, cpu, ts); |
2471 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2513 | if (dolock) |
2514 | spin_unlock(&cpu_buffer->reader_lock); | ||
2515 | local_irq_restore(flags); | ||
2472 | 2516 | ||
2473 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { | 2517 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { |
2474 | cpu_relax(); | 2518 | cpu_relax(); |
@@ -2520,6 +2564,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2520 | struct ring_buffer_per_cpu *cpu_buffer; | 2564 | struct ring_buffer_per_cpu *cpu_buffer; |
2521 | struct ring_buffer_event *event = NULL; | 2565 | struct ring_buffer_event *event = NULL; |
2522 | unsigned long flags; | 2566 | unsigned long flags; |
2567 | int dolock; | ||
2568 | |||
2569 | dolock = rb_ok_to_lock(); | ||
2523 | 2570 | ||
2524 | again: | 2571 | again: |
2525 | /* might be called in atomic */ | 2572 | /* might be called in atomic */ |
@@ -2529,7 +2576,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2529 | goto out; | 2576 | goto out; |
2530 | 2577 | ||
2531 | cpu_buffer = buffer->buffers[cpu]; | 2578 | cpu_buffer = buffer->buffers[cpu]; |
2532 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2579 | local_irq_save(flags); |
2580 | if (dolock) | ||
2581 | spin_lock(&cpu_buffer->reader_lock); | ||
2533 | 2582 | ||
2534 | event = rb_buffer_peek(buffer, cpu, ts); | 2583 | event = rb_buffer_peek(buffer, cpu, ts); |
2535 | if (!event) | 2584 | if (!event) |
@@ -2538,7 +2587,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2538 | rb_advance_reader(cpu_buffer); | 2587 | rb_advance_reader(cpu_buffer); |
2539 | 2588 | ||
2540 | out_unlock: | 2589 | out_unlock: |
2541 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2590 | if (dolock) |
2591 | spin_unlock(&cpu_buffer->reader_lock); | ||
2592 | local_irq_restore(flags); | ||
2542 | 2593 | ||
2543 | out: | 2594 | out: |
2544 | preempt_enable(); | 2595 | preempt_enable(); |
@@ -2680,6 +2731,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | |||
2680 | cpu_buffer->overrun = 0; | 2731 | cpu_buffer->overrun = 0; |
2681 | cpu_buffer->read = 0; | 2732 | cpu_buffer->read = 0; |
2682 | local_set(&cpu_buffer->entries, 0); | 2733 | local_set(&cpu_buffer->entries, 0); |
2734 | local_set(&cpu_buffer->committing, 0); | ||
2735 | local_set(&cpu_buffer->commits, 0); | ||
2683 | 2736 | ||
2684 | cpu_buffer->write_stamp = 0; | 2737 | cpu_buffer->write_stamp = 0; |
2685 | cpu_buffer->read_stamp = 0; | 2738 | cpu_buffer->read_stamp = 0; |
@@ -2734,12 +2787,25 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset); | |||
2734 | int ring_buffer_empty(struct ring_buffer *buffer) | 2787 | int ring_buffer_empty(struct ring_buffer *buffer) |
2735 | { | 2788 | { |
2736 | struct ring_buffer_per_cpu *cpu_buffer; | 2789 | struct ring_buffer_per_cpu *cpu_buffer; |
2790 | unsigned long flags; | ||
2791 | int dolock; | ||
2737 | int cpu; | 2792 | int cpu; |
2793 | int ret; | ||
2794 | |||
2795 | dolock = rb_ok_to_lock(); | ||
2738 | 2796 | ||
2739 | /* yes this is racy, but if you don't like the race, lock the buffer */ | 2797 | /* yes this is racy, but if you don't like the race, lock the buffer */ |
2740 | for_each_buffer_cpu(buffer, cpu) { | 2798 | for_each_buffer_cpu(buffer, cpu) { |
2741 | cpu_buffer = buffer->buffers[cpu]; | 2799 | cpu_buffer = buffer->buffers[cpu]; |
2742 | if (!rb_per_cpu_empty(cpu_buffer)) | 2800 | local_irq_save(flags); |
2801 | if (dolock) | ||
2802 | spin_lock(&cpu_buffer->reader_lock); | ||
2803 | ret = rb_per_cpu_empty(cpu_buffer); | ||
2804 | if (dolock) | ||
2805 | spin_unlock(&cpu_buffer->reader_lock); | ||
2806 | local_irq_restore(flags); | ||
2807 | |||
2808 | if (!ret) | ||
2743 | return 0; | 2809 | return 0; |
2744 | } | 2810 | } |
2745 | 2811 | ||
@@ -2755,14 +2821,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty); | |||
2755 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) | 2821 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) |
2756 | { | 2822 | { |
2757 | struct ring_buffer_per_cpu *cpu_buffer; | 2823 | struct ring_buffer_per_cpu *cpu_buffer; |
2824 | unsigned long flags; | ||
2825 | int dolock; | ||
2758 | int ret; | 2826 | int ret; |
2759 | 2827 | ||
2760 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2828 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2761 | return 1; | 2829 | return 1; |
2762 | 2830 | ||
2831 | dolock = rb_ok_to_lock(); | ||
2832 | |||
2763 | cpu_buffer = buffer->buffers[cpu]; | 2833 | cpu_buffer = buffer->buffers[cpu]; |
2834 | local_irq_save(flags); | ||
2835 | if (dolock) | ||
2836 | spin_lock(&cpu_buffer->reader_lock); | ||
2764 | ret = rb_per_cpu_empty(cpu_buffer); | 2837 | ret = rb_per_cpu_empty(cpu_buffer); |
2765 | 2838 | if (dolock) | |
2839 | spin_unlock(&cpu_buffer->reader_lock); | ||
2840 | local_irq_restore(flags); | ||
2766 | 2841 | ||
2767 | return ret; | 2842 | return ret; |
2768 | } | 2843 | } |
@@ -3108,7 +3183,7 @@ static int rb_cpu_notify(struct notifier_block *self, | |||
3108 | switch (action) { | 3183 | switch (action) { |
3109 | case CPU_UP_PREPARE: | 3184 | case CPU_UP_PREPARE: |
3110 | case CPU_UP_PREPARE_FROZEN: | 3185 | case CPU_UP_PREPARE_FROZEN: |
3111 | if (cpu_isset(cpu, *buffer->cpumask)) | 3186 | if (cpumask_test_cpu(cpu, buffer->cpumask)) |
3112 | return NOTIFY_OK; | 3187 | return NOTIFY_OK; |
3113 | 3188 | ||
3114 | buffer->buffers[cpu] = | 3189 | buffer->buffers[cpu] = |
@@ -3119,7 +3194,7 @@ static int rb_cpu_notify(struct notifier_block *self, | |||
3119 | return NOTIFY_OK; | 3194 | return NOTIFY_OK; |
3120 | } | 3195 | } |
3121 | smp_wmb(); | 3196 | smp_wmb(); |
3122 | cpu_set(cpu, *buffer->cpumask); | 3197 | cpumask_set_cpu(cpu, buffer->cpumask); |
3123 | break; | 3198 | break; |
3124 | case CPU_DOWN_PREPARE: | 3199 | case CPU_DOWN_PREPARE: |
3125 | case CPU_DOWN_PREPARE_FROZEN: | 3200 | case CPU_DOWN_PREPARE_FROZEN: |