aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-06-20 13:56:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-20 13:56:46 -0400
commitb0b7065b64fe517b4a50915a1555e8ee98890d64 (patch)
tree950e7735585a83f5b4efe7a9473b5b42d5ca4f57 /kernel/trace/ring_buffer.c
parent38df92b8cee936334f686c06df0e5fbb92e252df (diff)
parentd4c4038343510d83727ea922de4435996c26c0c8 (diff)
Merge branch 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (24 commits) tracing/urgent: warn in case of ftrace_start_up inbalance tracing/urgent: fix unbalanced ftrace_start_up function-graph: add stack frame test function-graph: disable when both x86_32 and optimize for size are configured ring-buffer: have benchmark test print to trace buffer ring-buffer: do not grab locks in nmi ring-buffer: add locks around rb_per_cpu_empty ring-buffer: check for less than two in size allocation ring-buffer: remove useless compile check for buffer_page size ring-buffer: remove useless warn on check ring-buffer: use BUF_PAGE_HDR_SIZE in calculating index tracing: update sample event documentation tracing/filters: fix race between filter setting and module unload tracing/filters: free filter_string in destroy_preds() ring-buffer: use commit counters for commit pointer accounting ring-buffer: remove unused variable ring-buffer: have benchmark test handle discarded events ring-buffer: prevent adding write in discarded area tracing/filters: strloc should be unsigned short tracing/filters: operand can be negative ... Fix up kmemcheck-induced conflict in kernel/trace/ring_buffer.c manually
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c311
1 files changed, 193 insertions, 118 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index dc4dc70171ce..04dac2638258 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -206,6 +206,7 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
206#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) 206#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
207#define RB_ALIGNMENT 4U 207#define RB_ALIGNMENT 4U
208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
209#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
209 210
210/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ 211/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
211#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX 212#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -415,6 +416,8 @@ struct ring_buffer_per_cpu {
415 unsigned long overrun; 416 unsigned long overrun;
416 unsigned long read; 417 unsigned long read;
417 local_t entries; 418 local_t entries;
419 local_t committing;
420 local_t commits;
418 u64 write_stamp; 421 u64 write_stamp;
419 u64 read_stamp; 422 u64 read_stamp;
420 atomic_t record_disabled; 423 atomic_t record_disabled;
@@ -618,12 +621,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
618 kfree(cpu_buffer); 621 kfree(cpu_buffer);
619} 622}
620 623
621/*
622 * Causes compile errors if the struct buffer_page gets bigger
623 * than the struct page.
624 */
625extern int ring_buffer_page_too_big(void);
626
627#ifdef CONFIG_HOTPLUG_CPU 624#ifdef CONFIG_HOTPLUG_CPU
628static int rb_cpu_notify(struct notifier_block *self, 625static int rb_cpu_notify(struct notifier_block *self,
629 unsigned long action, void *hcpu); 626 unsigned long action, void *hcpu);
@@ -646,11 +643,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
646 int bsize; 643 int bsize;
647 int cpu; 644 int cpu;
648 645
649 /* Paranoid! Optimizes out when all is well */
650 if (sizeof(struct buffer_page) > sizeof(struct page))
651 ring_buffer_page_too_big();
652
653
654 /* keep it in its own cache line */ 646 /* keep it in its own cache line */
655 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), 647 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
656 GFP_KERNEL); 648 GFP_KERNEL);
@@ -666,8 +658,8 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
666 buffer->reader_lock_key = key; 658 buffer->reader_lock_key = key;
667 659
668 /* need at least two pages */ 660 /* need at least two pages */
669 if (buffer->pages == 1) 661 if (buffer->pages < 2)
670 buffer->pages++; 662 buffer->pages = 2;
671 663
672 /* 664 /*
673 * In case of non-hotplug cpu, if the ring-buffer is allocated 665 * In case of non-hotplug cpu, if the ring-buffer is allocated
@@ -1011,12 +1003,12 @@ rb_event_index(struct ring_buffer_event *event)
1011{ 1003{
1012 unsigned long addr = (unsigned long)event; 1004 unsigned long addr = (unsigned long)event;
1013 1005
1014 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); 1006 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
1015} 1007}
1016 1008
1017static inline int 1009static inline int
1018rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, 1010rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1019 struct ring_buffer_event *event) 1011 struct ring_buffer_event *event)
1020{ 1012{
1021 unsigned long addr = (unsigned long)event; 1013 unsigned long addr = (unsigned long)event;
1022 unsigned long index; 1014 unsigned long index;
@@ -1029,31 +1021,6 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1029} 1021}
1030 1022
1031static void 1023static void
1032rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
1033 struct ring_buffer_event *event)
1034{
1035 unsigned long addr = (unsigned long)event;
1036 unsigned long index;
1037
1038 index = rb_event_index(event);
1039 addr &= PAGE_MASK;
1040
1041 while (cpu_buffer->commit_page->page != (void *)addr) {
1042 if (RB_WARN_ON(cpu_buffer,
1043 cpu_buffer->commit_page == cpu_buffer->tail_page))
1044 return;
1045 cpu_buffer->commit_page->page->commit =
1046 cpu_buffer->commit_page->write;
1047 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
1048 cpu_buffer->write_stamp =
1049 cpu_buffer->commit_page->page->time_stamp;
1050 }
1051
1052 /* Now set the commit to the event's index */
1053 local_set(&cpu_buffer->commit_page->page->commit, index);
1054}
1055
1056static void
1057rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 1024rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1058{ 1025{
1059 /* 1026 /*
@@ -1171,6 +1138,60 @@ static unsigned rb_calculate_event_length(unsigned length)
1171 return length; 1138 return length;
1172} 1139}
1173 1140
1141static inline void
1142rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1143 struct buffer_page *tail_page,
1144 unsigned long tail, unsigned long length)
1145{
1146 struct ring_buffer_event *event;
1147
1148 /*
1149 * Only the event that crossed the page boundary
1150 * must fill the old tail_page with padding.
1151 */
1152 if (tail >= BUF_PAGE_SIZE) {
1153 local_sub(length, &tail_page->write);
1154 return;
1155 }
1156
1157 event = __rb_page_index(tail_page, tail);
1158 kmemcheck_annotate_bitfield(event, bitfield);
1159
1160 /*
1161 * If this event is bigger than the minimum size, then
1162 * we need to be careful that we don't subtract the
1163 * write counter enough to allow another writer to slip
1164 * in on this page.
1165 * We put in a discarded commit instead, to make sure
1166 * that this space is not used again.
1167 *
1168 * If we are less than the minimum size, we don't need to
1169 * worry about it.
1170 */
1171 if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
1172 /* No room for any events */
1173
1174 /* Mark the rest of the page with padding */
1175 rb_event_set_padding(event);
1176
1177 /* Set the write back to the previous setting */
1178 local_sub(length, &tail_page->write);
1179 return;
1180 }
1181
1182 /* Put in a discarded event */
1183 event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
1184 event->type_len = RINGBUF_TYPE_PADDING;
1185 /* time delta must be non zero */
1186 event->time_delta = 1;
1187 /* Account for this as an entry */
1188 local_inc(&tail_page->entries);
1189 local_inc(&cpu_buffer->entries);
1190
1191 /* Set write to end of buffer */
1192 length = (tail + length) - BUF_PAGE_SIZE;
1193 local_sub(length, &tail_page->write);
1194}
1174 1195
1175static struct ring_buffer_event * 1196static struct ring_buffer_event *
1176rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 1197rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
@@ -1180,7 +1201,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1180{ 1201{
1181 struct buffer_page *next_page, *head_page, *reader_page; 1202 struct buffer_page *next_page, *head_page, *reader_page;
1182 struct ring_buffer *buffer = cpu_buffer->buffer; 1203 struct ring_buffer *buffer = cpu_buffer->buffer;
1183 struct ring_buffer_event *event;
1184 bool lock_taken = false; 1204 bool lock_taken = false;
1185 unsigned long flags; 1205 unsigned long flags;
1186 1206
@@ -1265,27 +1285,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1265 cpu_buffer->tail_page->page->time_stamp = *ts; 1285 cpu_buffer->tail_page->page->time_stamp = *ts;
1266 } 1286 }
1267 1287
1268 /* 1288 rb_reset_tail(cpu_buffer, tail_page, tail, length);
1269 * The actual tail page has moved forward.
1270 */
1271 if (tail < BUF_PAGE_SIZE) {
1272 /* Mark the rest of the page with padding */
1273 event = __rb_page_index(tail_page, tail);
1274 kmemcheck_annotate_bitfield(event, bitfield);
1275 rb_event_set_padding(event);
1276 }
1277
1278 /* Set the write back to the previous setting */
1279 local_sub(length, &tail_page->write);
1280
1281 /*
1282 * If this was a commit entry that failed,
1283 * increment that too
1284 */
1285 if (tail_page == cpu_buffer->commit_page &&
1286 tail == rb_commit_index(cpu_buffer)) {
1287 rb_set_commit_to_write(cpu_buffer);
1288 }
1289 1289
1290 __raw_spin_unlock(&cpu_buffer->lock); 1290 __raw_spin_unlock(&cpu_buffer->lock);
1291 local_irq_restore(flags); 1291 local_irq_restore(flags);
@@ -1295,7 +1295,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1295 1295
1296 out_reset: 1296 out_reset:
1297 /* reset write */ 1297 /* reset write */
1298 local_sub(length, &tail_page->write); 1298 rb_reset_tail(cpu_buffer, tail_page, tail, length);
1299 1299
1300 if (likely(lock_taken)) 1300 if (likely(lock_taken))
1301 __raw_spin_unlock(&cpu_buffer->lock); 1301 __raw_spin_unlock(&cpu_buffer->lock);
@@ -1325,9 +1325,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1325 1325
1326 /* We reserved something on the buffer */ 1326 /* We reserved something on the buffer */
1327 1327
1328 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
1329 return NULL;
1330
1331 event = __rb_page_index(tail_page, tail); 1328 event = __rb_page_index(tail_page, tail);
1332 kmemcheck_annotate_bitfield(event, bitfield); 1329 kmemcheck_annotate_bitfield(event, bitfield);
1333 rb_update_event(event, type, length); 1330 rb_update_event(event, type, length);
@@ -1337,11 +1334,11 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1337 local_inc(&tail_page->entries); 1334 local_inc(&tail_page->entries);
1338 1335
1339 /* 1336 /*
1340 * If this is a commit and the tail is zero, then update 1337 * If this is the first commit on the page, then update
1341 * this page's time stamp. 1338 * its timestamp.
1342 */ 1339 */
1343 if (!tail && rb_is_commit(cpu_buffer, event)) 1340 if (!tail)
1344 cpu_buffer->commit_page->page->time_stamp = *ts; 1341 tail_page->page->time_stamp = *ts;
1345 1342
1346 return event; 1343 return event;
1347} 1344}
@@ -1410,16 +1407,16 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1410 return -EAGAIN; 1407 return -EAGAIN;
1411 1408
1412 /* Only a commited time event can update the write stamp */ 1409 /* Only a commited time event can update the write stamp */
1413 if (rb_is_commit(cpu_buffer, event)) { 1410 if (rb_event_is_commit(cpu_buffer, event)) {
1414 /* 1411 /*
1415 * If this is the first on the page, then we need to 1412 * If this is the first on the page, then it was
1416 * update the page itself, and just put in a zero. 1413 * updated with the page itself. Try to discard it
1414 * and if we can't just make it zero.
1417 */ 1415 */
1418 if (rb_event_index(event)) { 1416 if (rb_event_index(event)) {
1419 event->time_delta = *delta & TS_MASK; 1417 event->time_delta = *delta & TS_MASK;
1420 event->array[0] = *delta >> TS_SHIFT; 1418 event->array[0] = *delta >> TS_SHIFT;
1421 } else { 1419 } else {
1422 cpu_buffer->commit_page->page->time_stamp = *ts;
1423 /* try to discard, since we do not need this */ 1420 /* try to discard, since we do not need this */
1424 if (!rb_try_to_discard(cpu_buffer, event)) { 1421 if (!rb_try_to_discard(cpu_buffer, event)) {
1425 /* nope, just zero it */ 1422 /* nope, just zero it */
@@ -1445,6 +1442,44 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1445 return ret; 1442 return ret;
1446} 1443}
1447 1444
1445static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
1446{
1447 local_inc(&cpu_buffer->committing);
1448 local_inc(&cpu_buffer->commits);
1449}
1450
1451static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
1452{
1453 unsigned long commits;
1454
1455 if (RB_WARN_ON(cpu_buffer,
1456 !local_read(&cpu_buffer->committing)))
1457 return;
1458
1459 again:
1460 commits = local_read(&cpu_buffer->commits);
1461 /* synchronize with interrupts */
1462 barrier();
1463 if (local_read(&cpu_buffer->committing) == 1)
1464 rb_set_commit_to_write(cpu_buffer);
1465
1466 local_dec(&cpu_buffer->committing);
1467
1468 /* synchronize with interrupts */
1469 barrier();
1470
1471 /*
1472 * Need to account for interrupts coming in between the
1473 * updating of the commit page and the clearing of the
1474 * committing counter.
1475 */
1476 if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
1477 !local_read(&cpu_buffer->committing)) {
1478 local_inc(&cpu_buffer->committing);
1479 goto again;
1480 }
1481}
1482
1448static struct ring_buffer_event * 1483static struct ring_buffer_event *
1449rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, 1484rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1450 unsigned long length) 1485 unsigned long length)
@@ -1454,6 +1489,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1454 int commit = 0; 1489 int commit = 0;
1455 int nr_loops = 0; 1490 int nr_loops = 0;
1456 1491
1492 rb_start_commit(cpu_buffer);
1493
1457 length = rb_calculate_event_length(length); 1494 length = rb_calculate_event_length(length);
1458 again: 1495 again:
1459 /* 1496 /*
@@ -1466,7 +1503,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1466 * Bail! 1503 * Bail!
1467 */ 1504 */
1468 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) 1505 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1469 return NULL; 1506 goto out_fail;
1470 1507
1471 ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); 1508 ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
1472 1509
@@ -1497,7 +1534,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1497 1534
1498 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); 1535 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
1499 if (commit == -EBUSY) 1536 if (commit == -EBUSY)
1500 return NULL; 1537 goto out_fail;
1501 1538
1502 if (commit == -EAGAIN) 1539 if (commit == -EAGAIN)
1503 goto again; 1540 goto again;
@@ -1511,28 +1548,19 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1511 if (unlikely(PTR_ERR(event) == -EAGAIN)) 1548 if (unlikely(PTR_ERR(event) == -EAGAIN))
1512 goto again; 1549 goto again;
1513 1550
1514 if (!event) { 1551 if (!event)
1515 if (unlikely(commit)) 1552 goto out_fail;
1516 /*
1517 * Ouch! We needed a timestamp and it was commited. But
1518 * we didn't get our event reserved.
1519 */
1520 rb_set_commit_to_write(cpu_buffer);
1521 return NULL;
1522 }
1523 1553
1524 /* 1554 if (!rb_event_is_commit(cpu_buffer, event))
1525 * If the timestamp was commited, make the commit our entry
1526 * now so that we will update it when needed.
1527 */
1528 if (unlikely(commit))
1529 rb_set_commit_event(cpu_buffer, event);
1530 else if (!rb_is_commit(cpu_buffer, event))
1531 delta = 0; 1555 delta = 0;
1532 1556
1533 event->time_delta = delta; 1557 event->time_delta = delta;
1534 1558
1535 return event; 1559 return event;
1560
1561 out_fail:
1562 rb_end_commit(cpu_buffer);
1563 return NULL;
1536} 1564}
1537 1565
1538#define TRACE_RECURSIVE_DEPTH 16 1566#define TRACE_RECURSIVE_DEPTH 16
@@ -1642,13 +1670,14 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1642{ 1670{
1643 local_inc(&cpu_buffer->entries); 1671 local_inc(&cpu_buffer->entries);
1644 1672
1645 /* Only process further if we own the commit */ 1673 /*
1646 if (!rb_is_commit(cpu_buffer, event)) 1674 * The event first in the commit queue updates the
1647 return; 1675 * time stamp.
1648 1676 */
1649 cpu_buffer->write_stamp += event->time_delta; 1677 if (rb_event_is_commit(cpu_buffer, event))
1678 cpu_buffer->write_stamp += event->time_delta;
1650 1679
1651 rb_set_commit_to_write(cpu_buffer); 1680 rb_end_commit(cpu_buffer);
1652} 1681}
1653 1682
1654/** 1683/**
@@ -1737,15 +1766,15 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
1737 /* The event is discarded regardless */ 1766 /* The event is discarded regardless */
1738 rb_event_discard(event); 1767 rb_event_discard(event);
1739 1768
1769 cpu = smp_processor_id();
1770 cpu_buffer = buffer->buffers[cpu];
1771
1740 /* 1772 /*
1741 * This must only be called if the event has not been 1773 * This must only be called if the event has not been
1742 * committed yet. Thus we can assume that preemption 1774 * committed yet. Thus we can assume that preemption
1743 * is still disabled. 1775 * is still disabled.
1744 */ 1776 */
1745 RB_WARN_ON(buffer, preemptible()); 1777 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
1746
1747 cpu = smp_processor_id();
1748 cpu_buffer = buffer->buffers[cpu];
1749 1778
1750 if (!rb_try_to_discard(cpu_buffer, event)) 1779 if (!rb_try_to_discard(cpu_buffer, event))
1751 goto out; 1780 goto out;
@@ -1756,13 +1785,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
1756 */ 1785 */
1757 local_inc(&cpu_buffer->entries); 1786 local_inc(&cpu_buffer->entries);
1758 out: 1787 out:
1759 /* 1788 rb_end_commit(cpu_buffer);
1760 * If a write came in and pushed the tail page
1761 * we still need to update the commit pointer
1762 * if we were the commit.
1763 */
1764 if (rb_is_commit(cpu_buffer, event))
1765 rb_set_commit_to_write(cpu_buffer);
1766 1789
1767 trace_recursive_unlock(); 1790 trace_recursive_unlock();
1768 1791
@@ -2446,6 +2469,21 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2446} 2469}
2447EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); 2470EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
2448 2471
2472static inline int rb_ok_to_lock(void)
2473{
2474 /*
2475 * If an NMI die dumps out the content of the ring buffer
2476 * do not grab locks. We also permanently disable the ring
2477 * buffer too. A one time deal is all you get from reading
2478 * the ring buffer from an NMI.
2479 */
2480 if (likely(!in_nmi() && !oops_in_progress))
2481 return 1;
2482
2483 tracing_off_permanent();
2484 return 0;
2485}
2486
2449/** 2487/**
2450 * ring_buffer_peek - peek at the next event to be read 2488 * ring_buffer_peek - peek at the next event to be read
2451 * @buffer: The ring buffer to read 2489 * @buffer: The ring buffer to read
@@ -2461,14 +2499,20 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2461 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 2499 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2462 struct ring_buffer_event *event; 2500 struct ring_buffer_event *event;
2463 unsigned long flags; 2501 unsigned long flags;
2502 int dolock;
2464 2503
2465 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2504 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2466 return NULL; 2505 return NULL;
2467 2506
2507 dolock = rb_ok_to_lock();
2468 again: 2508 again:
2469 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2509 local_irq_save(flags);
2510 if (dolock)
2511 spin_lock(&cpu_buffer->reader_lock);
2470 event = rb_buffer_peek(buffer, cpu, ts); 2512 event = rb_buffer_peek(buffer, cpu, ts);
2471 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2513 if (dolock)
2514 spin_unlock(&cpu_buffer->reader_lock);
2515 local_irq_restore(flags);
2472 2516
2473 if (event && event->type_len == RINGBUF_TYPE_PADDING) { 2517 if (event && event->type_len == RINGBUF_TYPE_PADDING) {
2474 cpu_relax(); 2518 cpu_relax();
@@ -2520,6 +2564,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2520 struct ring_buffer_per_cpu *cpu_buffer; 2564 struct ring_buffer_per_cpu *cpu_buffer;
2521 struct ring_buffer_event *event = NULL; 2565 struct ring_buffer_event *event = NULL;
2522 unsigned long flags; 2566 unsigned long flags;
2567 int dolock;
2568
2569 dolock = rb_ok_to_lock();
2523 2570
2524 again: 2571 again:
2525 /* might be called in atomic */ 2572 /* might be called in atomic */
@@ -2529,7 +2576,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2529 goto out; 2576 goto out;
2530 2577
2531 cpu_buffer = buffer->buffers[cpu]; 2578 cpu_buffer = buffer->buffers[cpu];
2532 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2579 local_irq_save(flags);
2580 if (dolock)
2581 spin_lock(&cpu_buffer->reader_lock);
2533 2582
2534 event = rb_buffer_peek(buffer, cpu, ts); 2583 event = rb_buffer_peek(buffer, cpu, ts);
2535 if (!event) 2584 if (!event)
@@ -2538,7 +2587,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2538 rb_advance_reader(cpu_buffer); 2587 rb_advance_reader(cpu_buffer);
2539 2588
2540 out_unlock: 2589 out_unlock:
2541 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2590 if (dolock)
2591 spin_unlock(&cpu_buffer->reader_lock);
2592 local_irq_restore(flags);
2542 2593
2543 out: 2594 out:
2544 preempt_enable(); 2595 preempt_enable();
@@ -2680,6 +2731,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2680 cpu_buffer->overrun = 0; 2731 cpu_buffer->overrun = 0;
2681 cpu_buffer->read = 0; 2732 cpu_buffer->read = 0;
2682 local_set(&cpu_buffer->entries, 0); 2733 local_set(&cpu_buffer->entries, 0);
2734 local_set(&cpu_buffer->committing, 0);
2735 local_set(&cpu_buffer->commits, 0);
2683 2736
2684 cpu_buffer->write_stamp = 0; 2737 cpu_buffer->write_stamp = 0;
2685 cpu_buffer->read_stamp = 0; 2738 cpu_buffer->read_stamp = 0;
@@ -2734,12 +2787,25 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset);
2734int ring_buffer_empty(struct ring_buffer *buffer) 2787int ring_buffer_empty(struct ring_buffer *buffer)
2735{ 2788{
2736 struct ring_buffer_per_cpu *cpu_buffer; 2789 struct ring_buffer_per_cpu *cpu_buffer;
2790 unsigned long flags;
2791 int dolock;
2737 int cpu; 2792 int cpu;
2793 int ret;
2794
2795 dolock = rb_ok_to_lock();
2738 2796
2739 /* yes this is racy, but if you don't like the race, lock the buffer */ 2797 /* yes this is racy, but if you don't like the race, lock the buffer */
2740 for_each_buffer_cpu(buffer, cpu) { 2798 for_each_buffer_cpu(buffer, cpu) {
2741 cpu_buffer = buffer->buffers[cpu]; 2799 cpu_buffer = buffer->buffers[cpu];
2742 if (!rb_per_cpu_empty(cpu_buffer)) 2800 local_irq_save(flags);
2801 if (dolock)
2802 spin_lock(&cpu_buffer->reader_lock);
2803 ret = rb_per_cpu_empty(cpu_buffer);
2804 if (dolock)
2805 spin_unlock(&cpu_buffer->reader_lock);
2806 local_irq_restore(flags);
2807
2808 if (!ret)
2743 return 0; 2809 return 0;
2744 } 2810 }
2745 2811
@@ -2755,14 +2821,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty);
2755int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) 2821int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
2756{ 2822{
2757 struct ring_buffer_per_cpu *cpu_buffer; 2823 struct ring_buffer_per_cpu *cpu_buffer;
2824 unsigned long flags;
2825 int dolock;
2758 int ret; 2826 int ret;
2759 2827
2760 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2828 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2761 return 1; 2829 return 1;
2762 2830
2831 dolock = rb_ok_to_lock();
2832
2763 cpu_buffer = buffer->buffers[cpu]; 2833 cpu_buffer = buffer->buffers[cpu];
2834 local_irq_save(flags);
2835 if (dolock)
2836 spin_lock(&cpu_buffer->reader_lock);
2764 ret = rb_per_cpu_empty(cpu_buffer); 2837 ret = rb_per_cpu_empty(cpu_buffer);
2765 2838 if (dolock)
2839 spin_unlock(&cpu_buffer->reader_lock);
2840 local_irq_restore(flags);
2766 2841
2767 return ret; 2842 return ret;
2768} 2843}
@@ -3108,7 +3183,7 @@ static int rb_cpu_notify(struct notifier_block *self,
3108 switch (action) { 3183 switch (action) {
3109 case CPU_UP_PREPARE: 3184 case CPU_UP_PREPARE:
3110 case CPU_UP_PREPARE_FROZEN: 3185 case CPU_UP_PREPARE_FROZEN:
3111 if (cpu_isset(cpu, *buffer->cpumask)) 3186 if (cpumask_test_cpu(cpu, buffer->cpumask))
3112 return NOTIFY_OK; 3187 return NOTIFY_OK;
3113 3188
3114 buffer->buffers[cpu] = 3189 buffer->buffers[cpu] =
@@ -3119,7 +3194,7 @@ static int rb_cpu_notify(struct notifier_block *self,
3119 return NOTIFY_OK; 3194 return NOTIFY_OK;
3120 } 3195 }
3121 smp_wmb(); 3196 smp_wmb();
3122 cpu_set(cpu, *buffer->cpumask); 3197 cpumask_set_cpu(cpu, buffer->cpumask);
3123 break; 3198 break;
3124 case CPU_DOWN_PREPARE: 3199 case CPU_DOWN_PREPARE:
3125 case CPU_DOWN_PREPARE_FROZEN: 3200 case CPU_DOWN_PREPARE_FROZEN: