diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-20 13:56:46 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-20 13:56:46 -0400 |
commit | b0b7065b64fe517b4a50915a1555e8ee98890d64 (patch) | |
tree | 950e7735585a83f5b4efe7a9473b5b42d5ca4f57 /kernel | |
parent | 38df92b8cee936334f686c06df0e5fbb92e252df (diff) | |
parent | d4c4038343510d83727ea922de4435996c26c0c8 (diff) |
Merge branch 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (24 commits)
tracing/urgent: warn in case of ftrace_start_up inbalance
tracing/urgent: fix unbalanced ftrace_start_up
function-graph: add stack frame test
function-graph: disable when both x86_32 and optimize for size are configured
ring-buffer: have benchmark test print to trace buffer
ring-buffer: do not grab locks in nmi
ring-buffer: add locks around rb_per_cpu_empty
ring-buffer: check for less than two in size allocation
ring-buffer: remove useless compile check for buffer_page size
ring-buffer: remove useless warn on check
ring-buffer: use BUF_PAGE_HDR_SIZE in calculating index
tracing: update sample event documentation
tracing/filters: fix race between filter setting and module unload
tracing/filters: free filter_string in destroy_preds()
ring-buffer: use commit counters for commit pointer accounting
ring-buffer: remove unused variable
ring-buffer: have benchmark test handle discarded events
ring-buffer: prevent adding write in discarded area
tracing/filters: strloc should be unsigned short
tracing/filters: operand can be negative
...
Fix up kmemcheck-induced conflict in kernel/trace/ring_buffer.c manually
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/trace/Kconfig | 8 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 7 | ||||
-rw-r--r-- | kernel/trace/kmemtrace.c | 2 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 311 | ||||
-rw-r--r-- | kernel/trace/ring_buffer_benchmark.c | 45 | ||||
-rw-r--r-- | kernel/trace/trace.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 37 | ||||
-rw-r--r-- | kernel/trace/trace_functions.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 36 |
9 files changed, 292 insertions, 170 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 61071fecc82e..1551f47e7669 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -18,6 +18,13 @@ config HAVE_FUNCTION_TRACER | |||
18 | config HAVE_FUNCTION_GRAPH_TRACER | 18 | config HAVE_FUNCTION_GRAPH_TRACER |
19 | bool | 19 | bool |
20 | 20 | ||
21 | config HAVE_FUNCTION_GRAPH_FP_TEST | ||
22 | bool | ||
23 | help | ||
24 | An arch may pass in a unique value (frame pointer) to both the | ||
25 | entering and exiting of a function. On exit, the value is compared | ||
26 | and if it does not match, then it will panic the kernel. | ||
27 | |||
21 | config HAVE_FUNCTION_TRACE_MCOUNT_TEST | 28 | config HAVE_FUNCTION_TRACE_MCOUNT_TEST |
22 | bool | 29 | bool |
23 | help | 30 | help |
@@ -121,6 +128,7 @@ config FUNCTION_GRAPH_TRACER | |||
121 | bool "Kernel Function Graph Tracer" | 128 | bool "Kernel Function Graph Tracer" |
122 | depends on HAVE_FUNCTION_GRAPH_TRACER | 129 | depends on HAVE_FUNCTION_GRAPH_TRACER |
123 | depends on FUNCTION_TRACER | 130 | depends on FUNCTION_TRACER |
131 | depends on !X86_32 || !CC_OPTIMIZE_FOR_SIZE | ||
124 | default y | 132 | default y |
125 | help | 133 | help |
126 | Enable the kernel to trace a function at both its return | 134 | Enable the kernel to trace a function at both its return |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bb60732ade0c..3718d55fb4c3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -1224,6 +1224,13 @@ static void ftrace_shutdown(int command) | |||
1224 | return; | 1224 | return; |
1225 | 1225 | ||
1226 | ftrace_start_up--; | 1226 | ftrace_start_up--; |
1227 | /* | ||
1228 | * Just warn in case of unbalance, no need to kill ftrace, it's not | ||
1229 | * critical but the ftrace_call callers may be never nopped again after | ||
1230 | * further ftrace uses. | ||
1231 | */ | ||
1232 | WARN_ON_ONCE(ftrace_start_up < 0); | ||
1233 | |||
1227 | if (!ftrace_start_up) | 1234 | if (!ftrace_start_up) |
1228 | command |= FTRACE_DISABLE_CALLS; | 1235 | command |= FTRACE_DISABLE_CALLS; |
1229 | 1236 | ||
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c index 86cdf671d7e2..1edaa9516e81 100644 --- a/kernel/trace/kmemtrace.c +++ b/kernel/trace/kmemtrace.c | |||
@@ -186,7 +186,7 @@ static int kmem_trace_init(struct trace_array *tr) | |||
186 | int cpu; | 186 | int cpu; |
187 | kmemtrace_array = tr; | 187 | kmemtrace_array = tr; |
188 | 188 | ||
189 | for_each_cpu_mask(cpu, cpu_possible_map) | 189 | for_each_cpu(cpu, cpu_possible_mask) |
190 | tracing_reset(tr, cpu); | 190 | tracing_reset(tr, cpu); |
191 | 191 | ||
192 | kmemtrace_start_probes(); | 192 | kmemtrace_start_probes(); |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index dc4dc70171ce..04dac2638258 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -206,6 +206,7 @@ EXPORT_SYMBOL_GPL(tracing_is_on); | |||
206 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) | 206 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) |
207 | #define RB_ALIGNMENT 4U | 207 | #define RB_ALIGNMENT 4U |
208 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 208 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
209 | #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ | ||
209 | 210 | ||
210 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ | 211 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ |
211 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX | 212 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX |
@@ -415,6 +416,8 @@ struct ring_buffer_per_cpu { | |||
415 | unsigned long overrun; | 416 | unsigned long overrun; |
416 | unsigned long read; | 417 | unsigned long read; |
417 | local_t entries; | 418 | local_t entries; |
419 | local_t committing; | ||
420 | local_t commits; | ||
418 | u64 write_stamp; | 421 | u64 write_stamp; |
419 | u64 read_stamp; | 422 | u64 read_stamp; |
420 | atomic_t record_disabled; | 423 | atomic_t record_disabled; |
@@ -618,12 +621,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) | |||
618 | kfree(cpu_buffer); | 621 | kfree(cpu_buffer); |
619 | } | 622 | } |
620 | 623 | ||
621 | /* | ||
622 | * Causes compile errors if the struct buffer_page gets bigger | ||
623 | * than the struct page. | ||
624 | */ | ||
625 | extern int ring_buffer_page_too_big(void); | ||
626 | |||
627 | #ifdef CONFIG_HOTPLUG_CPU | 624 | #ifdef CONFIG_HOTPLUG_CPU |
628 | static int rb_cpu_notify(struct notifier_block *self, | 625 | static int rb_cpu_notify(struct notifier_block *self, |
629 | unsigned long action, void *hcpu); | 626 | unsigned long action, void *hcpu); |
@@ -646,11 +643,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, | |||
646 | int bsize; | 643 | int bsize; |
647 | int cpu; | 644 | int cpu; |
648 | 645 | ||
649 | /* Paranoid! Optimizes out when all is well */ | ||
650 | if (sizeof(struct buffer_page) > sizeof(struct page)) | ||
651 | ring_buffer_page_too_big(); | ||
652 | |||
653 | |||
654 | /* keep it in its own cache line */ | 646 | /* keep it in its own cache line */ |
655 | buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), | 647 | buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), |
656 | GFP_KERNEL); | 648 | GFP_KERNEL); |
@@ -666,8 +658,8 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, | |||
666 | buffer->reader_lock_key = key; | 658 | buffer->reader_lock_key = key; |
667 | 659 | ||
668 | /* need at least two pages */ | 660 | /* need at least two pages */ |
669 | if (buffer->pages == 1) | 661 | if (buffer->pages < 2) |
670 | buffer->pages++; | 662 | buffer->pages = 2; |
671 | 663 | ||
672 | /* | 664 | /* |
673 | * In case of non-hotplug cpu, if the ring-buffer is allocated | 665 | * In case of non-hotplug cpu, if the ring-buffer is allocated |
@@ -1011,12 +1003,12 @@ rb_event_index(struct ring_buffer_event *event) | |||
1011 | { | 1003 | { |
1012 | unsigned long addr = (unsigned long)event; | 1004 | unsigned long addr = (unsigned long)event; |
1013 | 1005 | ||
1014 | return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); | 1006 | return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE; |
1015 | } | 1007 | } |
1016 | 1008 | ||
1017 | static inline int | 1009 | static inline int |
1018 | rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | 1010 | rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, |
1019 | struct ring_buffer_event *event) | 1011 | struct ring_buffer_event *event) |
1020 | { | 1012 | { |
1021 | unsigned long addr = (unsigned long)event; | 1013 | unsigned long addr = (unsigned long)event; |
1022 | unsigned long index; | 1014 | unsigned long index; |
@@ -1029,31 +1021,6 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
1029 | } | 1021 | } |
1030 | 1022 | ||
1031 | static void | 1023 | static void |
1032 | rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, | ||
1033 | struct ring_buffer_event *event) | ||
1034 | { | ||
1035 | unsigned long addr = (unsigned long)event; | ||
1036 | unsigned long index; | ||
1037 | |||
1038 | index = rb_event_index(event); | ||
1039 | addr &= PAGE_MASK; | ||
1040 | |||
1041 | while (cpu_buffer->commit_page->page != (void *)addr) { | ||
1042 | if (RB_WARN_ON(cpu_buffer, | ||
1043 | cpu_buffer->commit_page == cpu_buffer->tail_page)) | ||
1044 | return; | ||
1045 | cpu_buffer->commit_page->page->commit = | ||
1046 | cpu_buffer->commit_page->write; | ||
1047 | rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); | ||
1048 | cpu_buffer->write_stamp = | ||
1049 | cpu_buffer->commit_page->page->time_stamp; | ||
1050 | } | ||
1051 | |||
1052 | /* Now set the commit to the event's index */ | ||
1053 | local_set(&cpu_buffer->commit_page->page->commit, index); | ||
1054 | } | ||
1055 | |||
1056 | static void | ||
1057 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | 1024 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) |
1058 | { | 1025 | { |
1059 | /* | 1026 | /* |
@@ -1171,6 +1138,60 @@ static unsigned rb_calculate_event_length(unsigned length) | |||
1171 | return length; | 1138 | return length; |
1172 | } | 1139 | } |
1173 | 1140 | ||
1141 | static inline void | ||
1142 | rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | ||
1143 | struct buffer_page *tail_page, | ||
1144 | unsigned long tail, unsigned long length) | ||
1145 | { | ||
1146 | struct ring_buffer_event *event; | ||
1147 | |||
1148 | /* | ||
1149 | * Only the event that crossed the page boundary | ||
1150 | * must fill the old tail_page with padding. | ||
1151 | */ | ||
1152 | if (tail >= BUF_PAGE_SIZE) { | ||
1153 | local_sub(length, &tail_page->write); | ||
1154 | return; | ||
1155 | } | ||
1156 | |||
1157 | event = __rb_page_index(tail_page, tail); | ||
1158 | kmemcheck_annotate_bitfield(event, bitfield); | ||
1159 | |||
1160 | /* | ||
1161 | * If this event is bigger than the minimum size, then | ||
1162 | * we need to be careful that we don't subtract the | ||
1163 | * write counter enough to allow another writer to slip | ||
1164 | * in on this page. | ||
1165 | * We put in a discarded commit instead, to make sure | ||
1166 | * that this space is not used again. | ||
1167 | * | ||
1168 | * If we are less than the minimum size, we don't need to | ||
1169 | * worry about it. | ||
1170 | */ | ||
1171 | if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) { | ||
1172 | /* No room for any events */ | ||
1173 | |||
1174 | /* Mark the rest of the page with padding */ | ||
1175 | rb_event_set_padding(event); | ||
1176 | |||
1177 | /* Set the write back to the previous setting */ | ||
1178 | local_sub(length, &tail_page->write); | ||
1179 | return; | ||
1180 | } | ||
1181 | |||
1182 | /* Put in a discarded event */ | ||
1183 | event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE; | ||
1184 | event->type_len = RINGBUF_TYPE_PADDING; | ||
1185 | /* time delta must be non zero */ | ||
1186 | event->time_delta = 1; | ||
1187 | /* Account for this as an entry */ | ||
1188 | local_inc(&tail_page->entries); | ||
1189 | local_inc(&cpu_buffer->entries); | ||
1190 | |||
1191 | /* Set write to end of buffer */ | ||
1192 | length = (tail + length) - BUF_PAGE_SIZE; | ||
1193 | local_sub(length, &tail_page->write); | ||
1194 | } | ||
1174 | 1195 | ||
1175 | static struct ring_buffer_event * | 1196 | static struct ring_buffer_event * |
1176 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | 1197 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, |
@@ -1180,7 +1201,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1180 | { | 1201 | { |
1181 | struct buffer_page *next_page, *head_page, *reader_page; | 1202 | struct buffer_page *next_page, *head_page, *reader_page; |
1182 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1203 | struct ring_buffer *buffer = cpu_buffer->buffer; |
1183 | struct ring_buffer_event *event; | ||
1184 | bool lock_taken = false; | 1204 | bool lock_taken = false; |
1185 | unsigned long flags; | 1205 | unsigned long flags; |
1186 | 1206 | ||
@@ -1265,27 +1285,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1265 | cpu_buffer->tail_page->page->time_stamp = *ts; | 1285 | cpu_buffer->tail_page->page->time_stamp = *ts; |
1266 | } | 1286 | } |
1267 | 1287 | ||
1268 | /* | 1288 | rb_reset_tail(cpu_buffer, tail_page, tail, length); |
1269 | * The actual tail page has moved forward. | ||
1270 | */ | ||
1271 | if (tail < BUF_PAGE_SIZE) { | ||
1272 | /* Mark the rest of the page with padding */ | ||
1273 | event = __rb_page_index(tail_page, tail); | ||
1274 | kmemcheck_annotate_bitfield(event, bitfield); | ||
1275 | rb_event_set_padding(event); | ||
1276 | } | ||
1277 | |||
1278 | /* Set the write back to the previous setting */ | ||
1279 | local_sub(length, &tail_page->write); | ||
1280 | |||
1281 | /* | ||
1282 | * If this was a commit entry that failed, | ||
1283 | * increment that too | ||
1284 | */ | ||
1285 | if (tail_page == cpu_buffer->commit_page && | ||
1286 | tail == rb_commit_index(cpu_buffer)) { | ||
1287 | rb_set_commit_to_write(cpu_buffer); | ||
1288 | } | ||
1289 | 1289 | ||
1290 | __raw_spin_unlock(&cpu_buffer->lock); | 1290 | __raw_spin_unlock(&cpu_buffer->lock); |
1291 | local_irq_restore(flags); | 1291 | local_irq_restore(flags); |
@@ -1295,7 +1295,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1295 | 1295 | ||
1296 | out_reset: | 1296 | out_reset: |
1297 | /* reset write */ | 1297 | /* reset write */ |
1298 | local_sub(length, &tail_page->write); | 1298 | rb_reset_tail(cpu_buffer, tail_page, tail, length); |
1299 | 1299 | ||
1300 | if (likely(lock_taken)) | 1300 | if (likely(lock_taken)) |
1301 | __raw_spin_unlock(&cpu_buffer->lock); | 1301 | __raw_spin_unlock(&cpu_buffer->lock); |
@@ -1325,9 +1325,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1325 | 1325 | ||
1326 | /* We reserved something on the buffer */ | 1326 | /* We reserved something on the buffer */ |
1327 | 1327 | ||
1328 | if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE)) | ||
1329 | return NULL; | ||
1330 | |||
1331 | event = __rb_page_index(tail_page, tail); | 1328 | event = __rb_page_index(tail_page, tail); |
1332 | kmemcheck_annotate_bitfield(event, bitfield); | 1329 | kmemcheck_annotate_bitfield(event, bitfield); |
1333 | rb_update_event(event, type, length); | 1330 | rb_update_event(event, type, length); |
@@ -1337,11 +1334,11 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1337 | local_inc(&tail_page->entries); | 1334 | local_inc(&tail_page->entries); |
1338 | 1335 | ||
1339 | /* | 1336 | /* |
1340 | * If this is a commit and the tail is zero, then update | 1337 | * If this is the first commit on the page, then update |
1341 | * this page's time stamp. | 1338 | * its timestamp. |
1342 | */ | 1339 | */ |
1343 | if (!tail && rb_is_commit(cpu_buffer, event)) | 1340 | if (!tail) |
1344 | cpu_buffer->commit_page->page->time_stamp = *ts; | 1341 | tail_page->page->time_stamp = *ts; |
1345 | 1342 | ||
1346 | return event; | 1343 | return event; |
1347 | } | 1344 | } |
@@ -1410,16 +1407,16 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
1410 | return -EAGAIN; | 1407 | return -EAGAIN; |
1411 | 1408 | ||
1412 | /* Only a commited time event can update the write stamp */ | 1409 | /* Only a commited time event can update the write stamp */ |
1413 | if (rb_is_commit(cpu_buffer, event)) { | 1410 | if (rb_event_is_commit(cpu_buffer, event)) { |
1414 | /* | 1411 | /* |
1415 | * If this is the first on the page, then we need to | 1412 | * If this is the first on the page, then it was |
1416 | * update the page itself, and just put in a zero. | 1413 | * updated with the page itself. Try to discard it |
1414 | * and if we can't just make it zero. | ||
1417 | */ | 1415 | */ |
1418 | if (rb_event_index(event)) { | 1416 | if (rb_event_index(event)) { |
1419 | event->time_delta = *delta & TS_MASK; | 1417 | event->time_delta = *delta & TS_MASK; |
1420 | event->array[0] = *delta >> TS_SHIFT; | 1418 | event->array[0] = *delta >> TS_SHIFT; |
1421 | } else { | 1419 | } else { |
1422 | cpu_buffer->commit_page->page->time_stamp = *ts; | ||
1423 | /* try to discard, since we do not need this */ | 1420 | /* try to discard, since we do not need this */ |
1424 | if (!rb_try_to_discard(cpu_buffer, event)) { | 1421 | if (!rb_try_to_discard(cpu_buffer, event)) { |
1425 | /* nope, just zero it */ | 1422 | /* nope, just zero it */ |
@@ -1445,6 +1442,44 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
1445 | return ret; | 1442 | return ret; |
1446 | } | 1443 | } |
1447 | 1444 | ||
1445 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) | ||
1446 | { | ||
1447 | local_inc(&cpu_buffer->committing); | ||
1448 | local_inc(&cpu_buffer->commits); | ||
1449 | } | ||
1450 | |||
1451 | static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) | ||
1452 | { | ||
1453 | unsigned long commits; | ||
1454 | |||
1455 | if (RB_WARN_ON(cpu_buffer, | ||
1456 | !local_read(&cpu_buffer->committing))) | ||
1457 | return; | ||
1458 | |||
1459 | again: | ||
1460 | commits = local_read(&cpu_buffer->commits); | ||
1461 | /* synchronize with interrupts */ | ||
1462 | barrier(); | ||
1463 | if (local_read(&cpu_buffer->committing) == 1) | ||
1464 | rb_set_commit_to_write(cpu_buffer); | ||
1465 | |||
1466 | local_dec(&cpu_buffer->committing); | ||
1467 | |||
1468 | /* synchronize with interrupts */ | ||
1469 | barrier(); | ||
1470 | |||
1471 | /* | ||
1472 | * Need to account for interrupts coming in between the | ||
1473 | * updating of the commit page and the clearing of the | ||
1474 | * committing counter. | ||
1475 | */ | ||
1476 | if (unlikely(local_read(&cpu_buffer->commits) != commits) && | ||
1477 | !local_read(&cpu_buffer->committing)) { | ||
1478 | local_inc(&cpu_buffer->committing); | ||
1479 | goto again; | ||
1480 | } | ||
1481 | } | ||
1482 | |||
1448 | static struct ring_buffer_event * | 1483 | static struct ring_buffer_event * |
1449 | rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | 1484 | rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, |
1450 | unsigned long length) | 1485 | unsigned long length) |
@@ -1454,6 +1489,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1454 | int commit = 0; | 1489 | int commit = 0; |
1455 | int nr_loops = 0; | 1490 | int nr_loops = 0; |
1456 | 1491 | ||
1492 | rb_start_commit(cpu_buffer); | ||
1493 | |||
1457 | length = rb_calculate_event_length(length); | 1494 | length = rb_calculate_event_length(length); |
1458 | again: | 1495 | again: |
1459 | /* | 1496 | /* |
@@ -1466,7 +1503,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1466 | * Bail! | 1503 | * Bail! |
1467 | */ | 1504 | */ |
1468 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) | 1505 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) |
1469 | return NULL; | 1506 | goto out_fail; |
1470 | 1507 | ||
1471 | ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); | 1508 | ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); |
1472 | 1509 | ||
@@ -1497,7 +1534,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1497 | 1534 | ||
1498 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); | 1535 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); |
1499 | if (commit == -EBUSY) | 1536 | if (commit == -EBUSY) |
1500 | return NULL; | 1537 | goto out_fail; |
1501 | 1538 | ||
1502 | if (commit == -EAGAIN) | 1539 | if (commit == -EAGAIN) |
1503 | goto again; | 1540 | goto again; |
@@ -1511,28 +1548,19 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
1511 | if (unlikely(PTR_ERR(event) == -EAGAIN)) | 1548 | if (unlikely(PTR_ERR(event) == -EAGAIN)) |
1512 | goto again; | 1549 | goto again; |
1513 | 1550 | ||
1514 | if (!event) { | 1551 | if (!event) |
1515 | if (unlikely(commit)) | 1552 | goto out_fail; |
1516 | /* | ||
1517 | * Ouch! We needed a timestamp and it was commited. But | ||
1518 | * we didn't get our event reserved. | ||
1519 | */ | ||
1520 | rb_set_commit_to_write(cpu_buffer); | ||
1521 | return NULL; | ||
1522 | } | ||
1523 | 1553 | ||
1524 | /* | 1554 | if (!rb_event_is_commit(cpu_buffer, event)) |
1525 | * If the timestamp was commited, make the commit our entry | ||
1526 | * now so that we will update it when needed. | ||
1527 | */ | ||
1528 | if (unlikely(commit)) | ||
1529 | rb_set_commit_event(cpu_buffer, event); | ||
1530 | else if (!rb_is_commit(cpu_buffer, event)) | ||
1531 | delta = 0; | 1555 | delta = 0; |
1532 | 1556 | ||
1533 | event->time_delta = delta; | 1557 | event->time_delta = delta; |
1534 | 1558 | ||
1535 | return event; | 1559 | return event; |
1560 | |||
1561 | out_fail: | ||
1562 | rb_end_commit(cpu_buffer); | ||
1563 | return NULL; | ||
1536 | } | 1564 | } |
1537 | 1565 | ||
1538 | #define TRACE_RECURSIVE_DEPTH 16 | 1566 | #define TRACE_RECURSIVE_DEPTH 16 |
@@ -1642,13 +1670,14 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
1642 | { | 1670 | { |
1643 | local_inc(&cpu_buffer->entries); | 1671 | local_inc(&cpu_buffer->entries); |
1644 | 1672 | ||
1645 | /* Only process further if we own the commit */ | 1673 | /* |
1646 | if (!rb_is_commit(cpu_buffer, event)) | 1674 | * The event first in the commit queue updates the |
1647 | return; | 1675 | * time stamp. |
1648 | 1676 | */ | |
1649 | cpu_buffer->write_stamp += event->time_delta; | 1677 | if (rb_event_is_commit(cpu_buffer, event)) |
1678 | cpu_buffer->write_stamp += event->time_delta; | ||
1650 | 1679 | ||
1651 | rb_set_commit_to_write(cpu_buffer); | 1680 | rb_end_commit(cpu_buffer); |
1652 | } | 1681 | } |
1653 | 1682 | ||
1654 | /** | 1683 | /** |
@@ -1737,15 +1766,15 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, | |||
1737 | /* The event is discarded regardless */ | 1766 | /* The event is discarded regardless */ |
1738 | rb_event_discard(event); | 1767 | rb_event_discard(event); |
1739 | 1768 | ||
1769 | cpu = smp_processor_id(); | ||
1770 | cpu_buffer = buffer->buffers[cpu]; | ||
1771 | |||
1740 | /* | 1772 | /* |
1741 | * This must only be called if the event has not been | 1773 | * This must only be called if the event has not been |
1742 | * committed yet. Thus we can assume that preemption | 1774 | * committed yet. Thus we can assume that preemption |
1743 | * is still disabled. | 1775 | * is still disabled. |
1744 | */ | 1776 | */ |
1745 | RB_WARN_ON(buffer, preemptible()); | 1777 | RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); |
1746 | |||
1747 | cpu = smp_processor_id(); | ||
1748 | cpu_buffer = buffer->buffers[cpu]; | ||
1749 | 1778 | ||
1750 | if (!rb_try_to_discard(cpu_buffer, event)) | 1779 | if (!rb_try_to_discard(cpu_buffer, event)) |
1751 | goto out; | 1780 | goto out; |
@@ -1756,13 +1785,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, | |||
1756 | */ | 1785 | */ |
1757 | local_inc(&cpu_buffer->entries); | 1786 | local_inc(&cpu_buffer->entries); |
1758 | out: | 1787 | out: |
1759 | /* | 1788 | rb_end_commit(cpu_buffer); |
1760 | * If a write came in and pushed the tail page | ||
1761 | * we still need to update the commit pointer | ||
1762 | * if we were the commit. | ||
1763 | */ | ||
1764 | if (rb_is_commit(cpu_buffer, event)) | ||
1765 | rb_set_commit_to_write(cpu_buffer); | ||
1766 | 1789 | ||
1767 | trace_recursive_unlock(); | 1790 | trace_recursive_unlock(); |
1768 | 1791 | ||
@@ -2446,6 +2469,21 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2446 | } | 2469 | } |
2447 | EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); | 2470 | EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); |
2448 | 2471 | ||
2472 | static inline int rb_ok_to_lock(void) | ||
2473 | { | ||
2474 | /* | ||
2475 | * If an NMI die dumps out the content of the ring buffer | ||
2476 | * do not grab locks. We also permanently disable the ring | ||
2477 | * buffer too. A one time deal is all you get from reading | ||
2478 | * the ring buffer from an NMI. | ||
2479 | */ | ||
2480 | if (likely(!in_nmi() && !oops_in_progress)) | ||
2481 | return 1; | ||
2482 | |||
2483 | tracing_off_permanent(); | ||
2484 | return 0; | ||
2485 | } | ||
2486 | |||
2449 | /** | 2487 | /** |
2450 | * ring_buffer_peek - peek at the next event to be read | 2488 | * ring_buffer_peek - peek at the next event to be read |
2451 | * @buffer: The ring buffer to read | 2489 | * @buffer: The ring buffer to read |
@@ -2461,14 +2499,20 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2461 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 2499 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
2462 | struct ring_buffer_event *event; | 2500 | struct ring_buffer_event *event; |
2463 | unsigned long flags; | 2501 | unsigned long flags; |
2502 | int dolock; | ||
2464 | 2503 | ||
2465 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2504 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2466 | return NULL; | 2505 | return NULL; |
2467 | 2506 | ||
2507 | dolock = rb_ok_to_lock(); | ||
2468 | again: | 2508 | again: |
2469 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2509 | local_irq_save(flags); |
2510 | if (dolock) | ||
2511 | spin_lock(&cpu_buffer->reader_lock); | ||
2470 | event = rb_buffer_peek(buffer, cpu, ts); | 2512 | event = rb_buffer_peek(buffer, cpu, ts); |
2471 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2513 | if (dolock) |
2514 | spin_unlock(&cpu_buffer->reader_lock); | ||
2515 | local_irq_restore(flags); | ||
2472 | 2516 | ||
2473 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { | 2517 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { |
2474 | cpu_relax(); | 2518 | cpu_relax(); |
@@ -2520,6 +2564,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2520 | struct ring_buffer_per_cpu *cpu_buffer; | 2564 | struct ring_buffer_per_cpu *cpu_buffer; |
2521 | struct ring_buffer_event *event = NULL; | 2565 | struct ring_buffer_event *event = NULL; |
2522 | unsigned long flags; | 2566 | unsigned long flags; |
2567 | int dolock; | ||
2568 | |||
2569 | dolock = rb_ok_to_lock(); | ||
2523 | 2570 | ||
2524 | again: | 2571 | again: |
2525 | /* might be called in atomic */ | 2572 | /* might be called in atomic */ |
@@ -2529,7 +2576,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2529 | goto out; | 2576 | goto out; |
2530 | 2577 | ||
2531 | cpu_buffer = buffer->buffers[cpu]; | 2578 | cpu_buffer = buffer->buffers[cpu]; |
2532 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2579 | local_irq_save(flags); |
2580 | if (dolock) | ||
2581 | spin_lock(&cpu_buffer->reader_lock); | ||
2533 | 2582 | ||
2534 | event = rb_buffer_peek(buffer, cpu, ts); | 2583 | event = rb_buffer_peek(buffer, cpu, ts); |
2535 | if (!event) | 2584 | if (!event) |
@@ -2538,7 +2587,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2538 | rb_advance_reader(cpu_buffer); | 2587 | rb_advance_reader(cpu_buffer); |
2539 | 2588 | ||
2540 | out_unlock: | 2589 | out_unlock: |
2541 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2590 | if (dolock) |
2591 | spin_unlock(&cpu_buffer->reader_lock); | ||
2592 | local_irq_restore(flags); | ||
2542 | 2593 | ||
2543 | out: | 2594 | out: |
2544 | preempt_enable(); | 2595 | preempt_enable(); |
@@ -2680,6 +2731,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | |||
2680 | cpu_buffer->overrun = 0; | 2731 | cpu_buffer->overrun = 0; |
2681 | cpu_buffer->read = 0; | 2732 | cpu_buffer->read = 0; |
2682 | local_set(&cpu_buffer->entries, 0); | 2733 | local_set(&cpu_buffer->entries, 0); |
2734 | local_set(&cpu_buffer->committing, 0); | ||
2735 | local_set(&cpu_buffer->commits, 0); | ||
2683 | 2736 | ||
2684 | cpu_buffer->write_stamp = 0; | 2737 | cpu_buffer->write_stamp = 0; |
2685 | cpu_buffer->read_stamp = 0; | 2738 | cpu_buffer->read_stamp = 0; |
@@ -2734,12 +2787,25 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset); | |||
2734 | int ring_buffer_empty(struct ring_buffer *buffer) | 2787 | int ring_buffer_empty(struct ring_buffer *buffer) |
2735 | { | 2788 | { |
2736 | struct ring_buffer_per_cpu *cpu_buffer; | 2789 | struct ring_buffer_per_cpu *cpu_buffer; |
2790 | unsigned long flags; | ||
2791 | int dolock; | ||
2737 | int cpu; | 2792 | int cpu; |
2793 | int ret; | ||
2794 | |||
2795 | dolock = rb_ok_to_lock(); | ||
2738 | 2796 | ||
2739 | /* yes this is racy, but if you don't like the race, lock the buffer */ | 2797 | /* yes this is racy, but if you don't like the race, lock the buffer */ |
2740 | for_each_buffer_cpu(buffer, cpu) { | 2798 | for_each_buffer_cpu(buffer, cpu) { |
2741 | cpu_buffer = buffer->buffers[cpu]; | 2799 | cpu_buffer = buffer->buffers[cpu]; |
2742 | if (!rb_per_cpu_empty(cpu_buffer)) | 2800 | local_irq_save(flags); |
2801 | if (dolock) | ||
2802 | spin_lock(&cpu_buffer->reader_lock); | ||
2803 | ret = rb_per_cpu_empty(cpu_buffer); | ||
2804 | if (dolock) | ||
2805 | spin_unlock(&cpu_buffer->reader_lock); | ||
2806 | local_irq_restore(flags); | ||
2807 | |||
2808 | if (!ret) | ||
2743 | return 0; | 2809 | return 0; |
2744 | } | 2810 | } |
2745 | 2811 | ||
@@ -2755,14 +2821,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty); | |||
2755 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) | 2821 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) |
2756 | { | 2822 | { |
2757 | struct ring_buffer_per_cpu *cpu_buffer; | 2823 | struct ring_buffer_per_cpu *cpu_buffer; |
2824 | unsigned long flags; | ||
2825 | int dolock; | ||
2758 | int ret; | 2826 | int ret; |
2759 | 2827 | ||
2760 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2828 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2761 | return 1; | 2829 | return 1; |
2762 | 2830 | ||
2831 | dolock = rb_ok_to_lock(); | ||
2832 | |||
2763 | cpu_buffer = buffer->buffers[cpu]; | 2833 | cpu_buffer = buffer->buffers[cpu]; |
2834 | local_irq_save(flags); | ||
2835 | if (dolock) | ||
2836 | spin_lock(&cpu_buffer->reader_lock); | ||
2764 | ret = rb_per_cpu_empty(cpu_buffer); | 2837 | ret = rb_per_cpu_empty(cpu_buffer); |
2765 | 2838 | if (dolock) | |
2839 | spin_unlock(&cpu_buffer->reader_lock); | ||
2840 | local_irq_restore(flags); | ||
2766 | 2841 | ||
2767 | return ret; | 2842 | return ret; |
2768 | } | 2843 | } |
@@ -3108,7 +3183,7 @@ static int rb_cpu_notify(struct notifier_block *self, | |||
3108 | switch (action) { | 3183 | switch (action) { |
3109 | case CPU_UP_PREPARE: | 3184 | case CPU_UP_PREPARE: |
3110 | case CPU_UP_PREPARE_FROZEN: | 3185 | case CPU_UP_PREPARE_FROZEN: |
3111 | if (cpu_isset(cpu, *buffer->cpumask)) | 3186 | if (cpumask_test_cpu(cpu, buffer->cpumask)) |
3112 | return NOTIFY_OK; | 3187 | return NOTIFY_OK; |
3113 | 3188 | ||
3114 | buffer->buffers[cpu] = | 3189 | buffer->buffers[cpu] = |
@@ -3119,7 +3194,7 @@ static int rb_cpu_notify(struct notifier_block *self, | |||
3119 | return NOTIFY_OK; | 3194 | return NOTIFY_OK; |
3120 | } | 3195 | } |
3121 | smp_wmb(); | 3196 | smp_wmb(); |
3122 | cpu_set(cpu, *buffer->cpumask); | 3197 | cpumask_set_cpu(cpu, buffer->cpumask); |
3123 | break; | 3198 | break; |
3124 | case CPU_DOWN_PREPARE: | 3199 | case CPU_DOWN_PREPARE: |
3125 | case CPU_DOWN_PREPARE_FROZEN: | 3200 | case CPU_DOWN_PREPARE_FROZEN: |
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 8d68e149a8b3..573d3cc762c3 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c | |||
@@ -102,8 +102,10 @@ static enum event_status read_page(int cpu) | |||
102 | event = (void *)&rpage->data[i]; | 102 | event = (void *)&rpage->data[i]; |
103 | switch (event->type_len) { | 103 | switch (event->type_len) { |
104 | case RINGBUF_TYPE_PADDING: | 104 | case RINGBUF_TYPE_PADDING: |
105 | /* We don't expect any padding */ | 105 | /* failed writes may be discarded events */ |
106 | KILL_TEST(); | 106 | if (!event->time_delta) |
107 | KILL_TEST(); | ||
108 | inc = event->array[0] + 4; | ||
107 | break; | 109 | break; |
108 | case RINGBUF_TYPE_TIME_EXTEND: | 110 | case RINGBUF_TYPE_TIME_EXTEND: |
109 | inc = 8; | 111 | inc = 8; |
@@ -119,7 +121,7 @@ static enum event_status read_page(int cpu) | |||
119 | KILL_TEST(); | 121 | KILL_TEST(); |
120 | break; | 122 | break; |
121 | } | 123 | } |
122 | inc = event->array[0]; | 124 | inc = event->array[0] + 4; |
123 | break; | 125 | break; |
124 | default: | 126 | default: |
125 | entry = ring_buffer_event_data(event); | 127 | entry = ring_buffer_event_data(event); |
@@ -201,7 +203,7 @@ static void ring_buffer_producer(void) | |||
201 | * Hammer the buffer for 10 secs (this may | 203 | * Hammer the buffer for 10 secs (this may |
202 | * make the system stall) | 204 | * make the system stall) |
203 | */ | 205 | */ |
204 | pr_info("Starting ring buffer hammer\n"); | 206 | trace_printk("Starting ring buffer hammer\n"); |
205 | do_gettimeofday(&start_tv); | 207 | do_gettimeofday(&start_tv); |
206 | do { | 208 | do { |
207 | struct ring_buffer_event *event; | 209 | struct ring_buffer_event *event; |
@@ -237,7 +239,7 @@ static void ring_buffer_producer(void) | |||
237 | #endif | 239 | #endif |
238 | 240 | ||
239 | } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test); | 241 | } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test); |
240 | pr_info("End ring buffer hammer\n"); | 242 | trace_printk("End ring buffer hammer\n"); |
241 | 243 | ||
242 | if (consumer) { | 244 | if (consumer) { |
243 | /* Init both completions here to avoid races */ | 245 | /* Init both completions here to avoid races */ |
@@ -260,49 +262,50 @@ static void ring_buffer_producer(void) | |||
260 | overruns = ring_buffer_overruns(buffer); | 262 | overruns = ring_buffer_overruns(buffer); |
261 | 263 | ||
262 | if (kill_test) | 264 | if (kill_test) |
263 | pr_info("ERROR!\n"); | 265 | trace_printk("ERROR!\n"); |
264 | pr_info("Time: %lld (usecs)\n", time); | 266 | trace_printk("Time: %lld (usecs)\n", time); |
265 | pr_info("Overruns: %lld\n", overruns); | 267 | trace_printk("Overruns: %lld\n", overruns); |
266 | if (disable_reader) | 268 | if (disable_reader) |
267 | pr_info("Read: (reader disabled)\n"); | 269 | trace_printk("Read: (reader disabled)\n"); |
268 | else | 270 | else |
269 | pr_info("Read: %ld (by %s)\n", read, | 271 | trace_printk("Read: %ld (by %s)\n", read, |
270 | read_events ? "events" : "pages"); | 272 | read_events ? "events" : "pages"); |
271 | pr_info("Entries: %lld\n", entries); | 273 | trace_printk("Entries: %lld\n", entries); |
272 | pr_info("Total: %lld\n", entries + overruns + read); | 274 | trace_printk("Total: %lld\n", entries + overruns + read); |
273 | pr_info("Missed: %ld\n", missed); | 275 | trace_printk("Missed: %ld\n", missed); |
274 | pr_info("Hit: %ld\n", hit); | 276 | trace_printk("Hit: %ld\n", hit); |
275 | 277 | ||
276 | /* Convert time from usecs to millisecs */ | 278 | /* Convert time from usecs to millisecs */ |
277 | do_div(time, USEC_PER_MSEC); | 279 | do_div(time, USEC_PER_MSEC); |
278 | if (time) | 280 | if (time) |
279 | hit /= (long)time; | 281 | hit /= (long)time; |
280 | else | 282 | else |
281 | pr_info("TIME IS ZERO??\n"); | 283 | trace_printk("TIME IS ZERO??\n"); |
282 | 284 | ||
283 | pr_info("Entries per millisec: %ld\n", hit); | 285 | trace_printk("Entries per millisec: %ld\n", hit); |
284 | 286 | ||
285 | if (hit) { | 287 | if (hit) { |
286 | /* Calculate the average time in nanosecs */ | 288 | /* Calculate the average time in nanosecs */ |
287 | avg = NSEC_PER_MSEC / hit; | 289 | avg = NSEC_PER_MSEC / hit; |
288 | pr_info("%ld ns per entry\n", avg); | 290 | trace_printk("%ld ns per entry\n", avg); |
289 | } | 291 | } |
290 | 292 | ||
291 | if (missed) { | 293 | if (missed) { |
292 | if (time) | 294 | if (time) |
293 | missed /= (long)time; | 295 | missed /= (long)time; |
294 | 296 | ||
295 | pr_info("Total iterations per millisec: %ld\n", hit + missed); | 297 | trace_printk("Total iterations per millisec: %ld\n", |
298 | hit + missed); | ||
296 | 299 | ||
297 | /* it is possible that hit + missed will overflow and be zero */ | 300 | /* it is possible that hit + missed will overflow and be zero */ |
298 | if (!(hit + missed)) { | 301 | if (!(hit + missed)) { |
299 | pr_info("hit + missed overflowed and totalled zero!\n"); | 302 | trace_printk("hit + missed overflowed and totalled zero!\n"); |
300 | hit--; /* make it non zero */ | 303 | hit--; /* make it non zero */ |
301 | } | 304 | } |
302 | 305 | ||
303 | /* Caculate the average time in nanosecs */ | 306 | /* Caculate the average time in nanosecs */ |
304 | avg = NSEC_PER_MSEC / (hit + missed); | 307 | avg = NSEC_PER_MSEC / (hit + missed); |
305 | pr_info("%ld ns per entry\n", avg); | 308 | trace_printk("%ld ns per entry\n", avg); |
306 | } | 309 | } |
307 | } | 310 | } |
308 | 311 | ||
@@ -353,7 +356,7 @@ static int ring_buffer_producer_thread(void *arg) | |||
353 | 356 | ||
354 | ring_buffer_producer(); | 357 | ring_buffer_producer(); |
355 | 358 | ||
356 | pr_info("Sleeping for 10 secs\n"); | 359 | trace_printk("Sleeping for 10 secs\n"); |
357 | set_current_state(TASK_INTERRUPTIBLE); | 360 | set_current_state(TASK_INTERRUPTIBLE); |
358 | schedule_timeout(HZ * SLEEP_TIME); | 361 | schedule_timeout(HZ * SLEEP_TIME); |
359 | __set_current_state(TASK_RUNNING); | 362 | __set_current_state(TASK_RUNNING); |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c1878bfb2e1e..076fa6f0ee48 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -2191,11 +2191,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, | |||
2191 | if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) | 2191 | if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) |
2192 | return -ENOMEM; | 2192 | return -ENOMEM; |
2193 | 2193 | ||
2194 | mutex_lock(&tracing_cpumask_update_lock); | ||
2195 | err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); | 2194 | err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); |
2196 | if (err) | 2195 | if (err) |
2197 | goto err_unlock; | 2196 | goto err_unlock; |
2198 | 2197 | ||
2198 | mutex_lock(&tracing_cpumask_update_lock); | ||
2199 | |||
2199 | local_irq_disable(); | 2200 | local_irq_disable(); |
2200 | __raw_spin_lock(&ftrace_max_lock); | 2201 | __raw_spin_lock(&ftrace_max_lock); |
2201 | for_each_tracing_cpu(cpu) { | 2202 | for_each_tracing_cpu(cpu) { |
@@ -2223,8 +2224,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, | |||
2223 | return count; | 2224 | return count; |
2224 | 2225 | ||
2225 | err_unlock: | 2226 | err_unlock: |
2226 | mutex_unlock(&tracing_cpumask_update_lock); | 2227 | free_cpumask_var(tracing_cpumask_new); |
2227 | free_cpumask_var(tracing_cpumask); | ||
2228 | 2228 | ||
2229 | return err; | 2229 | return err; |
2230 | } | 2230 | } |
@@ -3626,7 +3626,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, | |||
3626 | struct trace_seq *s; | 3626 | struct trace_seq *s; |
3627 | unsigned long cnt; | 3627 | unsigned long cnt; |
3628 | 3628 | ||
3629 | s = kmalloc(sizeof(*s), GFP_ATOMIC); | 3629 | s = kmalloc(sizeof(*s), GFP_KERNEL); |
3630 | if (!s) | 3630 | if (!s) |
3631 | return ENOMEM; | 3631 | return ENOMEM; |
3632 | 3632 | ||
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index db6e54bdb596..936c621bbf46 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -27,8 +27,6 @@ | |||
27 | #include "trace.h" | 27 | #include "trace.h" |
28 | #include "trace_output.h" | 28 | #include "trace_output.h" |
29 | 29 | ||
30 | static DEFINE_MUTEX(filter_mutex); | ||
31 | |||
32 | enum filter_op_ids | 30 | enum filter_op_ids |
33 | { | 31 | { |
34 | OP_OR, | 32 | OP_OR, |
@@ -178,7 +176,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event, | |||
178 | static int filter_pred_strloc(struct filter_pred *pred, void *event, | 176 | static int filter_pred_strloc(struct filter_pred *pred, void *event, |
179 | int val1, int val2) | 177 | int val1, int val2) |
180 | { | 178 | { |
181 | int str_loc = *(int *)(event + pred->offset); | 179 | unsigned short str_loc = *(unsigned short *)(event + pred->offset); |
182 | char *addr = (char *)(event + str_loc); | 180 | char *addr = (char *)(event + str_loc); |
183 | int cmp, match; | 181 | int cmp, match; |
184 | 182 | ||
@@ -294,12 +292,12 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) | |||
294 | { | 292 | { |
295 | struct event_filter *filter = call->filter; | 293 | struct event_filter *filter = call->filter; |
296 | 294 | ||
297 | mutex_lock(&filter_mutex); | 295 | mutex_lock(&event_mutex); |
298 | if (filter->filter_string) | 296 | if (filter->filter_string) |
299 | trace_seq_printf(s, "%s\n", filter->filter_string); | 297 | trace_seq_printf(s, "%s\n", filter->filter_string); |
300 | else | 298 | else |
301 | trace_seq_printf(s, "none\n"); | 299 | trace_seq_printf(s, "none\n"); |
302 | mutex_unlock(&filter_mutex); | 300 | mutex_unlock(&event_mutex); |
303 | } | 301 | } |
304 | 302 | ||
305 | void print_subsystem_event_filter(struct event_subsystem *system, | 303 | void print_subsystem_event_filter(struct event_subsystem *system, |
@@ -307,12 +305,12 @@ void print_subsystem_event_filter(struct event_subsystem *system, | |||
307 | { | 305 | { |
308 | struct event_filter *filter = system->filter; | 306 | struct event_filter *filter = system->filter; |
309 | 307 | ||
310 | mutex_lock(&filter_mutex); | 308 | mutex_lock(&event_mutex); |
311 | if (filter->filter_string) | 309 | if (filter->filter_string) |
312 | trace_seq_printf(s, "%s\n", filter->filter_string); | 310 | trace_seq_printf(s, "%s\n", filter->filter_string); |
313 | else | 311 | else |
314 | trace_seq_printf(s, "none\n"); | 312 | trace_seq_printf(s, "none\n"); |
315 | mutex_unlock(&filter_mutex); | 313 | mutex_unlock(&event_mutex); |
316 | } | 314 | } |
317 | 315 | ||
318 | static struct ftrace_event_field * | 316 | static struct ftrace_event_field * |
@@ -381,6 +379,7 @@ void destroy_preds(struct ftrace_event_call *call) | |||
381 | filter_free_pred(filter->preds[i]); | 379 | filter_free_pred(filter->preds[i]); |
382 | } | 380 | } |
383 | kfree(filter->preds); | 381 | kfree(filter->preds); |
382 | kfree(filter->filter_string); | ||
384 | kfree(filter); | 383 | kfree(filter); |
385 | call->filter = NULL; | 384 | call->filter = NULL; |
386 | } | 385 | } |
@@ -433,7 +432,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system) | |||
433 | filter->n_preds = 0; | 432 | filter->n_preds = 0; |
434 | } | 433 | } |
435 | 434 | ||
436 | mutex_lock(&event_mutex); | ||
437 | list_for_each_entry(call, &ftrace_events, list) { | 435 | list_for_each_entry(call, &ftrace_events, list) { |
438 | if (!call->define_fields) | 436 | if (!call->define_fields) |
439 | continue; | 437 | continue; |
@@ -443,7 +441,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system) | |||
443 | remove_filter_string(call->filter); | 441 | remove_filter_string(call->filter); |
444 | } | 442 | } |
445 | } | 443 | } |
446 | mutex_unlock(&event_mutex); | ||
447 | } | 444 | } |
448 | 445 | ||
449 | static int filter_add_pred_fn(struct filter_parse_state *ps, | 446 | static int filter_add_pred_fn(struct filter_parse_state *ps, |
@@ -546,6 +543,7 @@ static int filter_add_pred(struct filter_parse_state *ps, | |||
546 | filter_pred_fn_t fn; | 543 | filter_pred_fn_t fn; |
547 | unsigned long long val; | 544 | unsigned long long val; |
548 | int string_type; | 545 | int string_type; |
546 | int ret; | ||
549 | 547 | ||
550 | pred->fn = filter_pred_none; | 548 | pred->fn = filter_pred_none; |
551 | 549 | ||
@@ -581,7 +579,11 @@ static int filter_add_pred(struct filter_parse_state *ps, | |||
581 | pred->not = 1; | 579 | pred->not = 1; |
582 | return filter_add_pred_fn(ps, call, pred, fn); | 580 | return filter_add_pred_fn(ps, call, pred, fn); |
583 | } else { | 581 | } else { |
584 | if (strict_strtoull(pred->str_val, 0, &val)) { | 582 | if (field->is_signed) |
583 | ret = strict_strtoll(pred->str_val, 0, &val); | ||
584 | else | ||
585 | ret = strict_strtoull(pred->str_val, 0, &val); | ||
586 | if (ret) { | ||
585 | parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); | 587 | parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); |
586 | return -EINVAL; | 588 | return -EINVAL; |
587 | } | 589 | } |
@@ -625,7 +627,6 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps, | |||
625 | filter->preds[filter->n_preds] = pred; | 627 | filter->preds[filter->n_preds] = pred; |
626 | filter->n_preds++; | 628 | filter->n_preds++; |
627 | 629 | ||
628 | mutex_lock(&event_mutex); | ||
629 | list_for_each_entry(call, &ftrace_events, list) { | 630 | list_for_each_entry(call, &ftrace_events, list) { |
630 | 631 | ||
631 | if (!call->define_fields) | 632 | if (!call->define_fields) |
@@ -636,14 +637,12 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps, | |||
636 | 637 | ||
637 | err = filter_add_pred(ps, call, pred); | 638 | err = filter_add_pred(ps, call, pred); |
638 | if (err) { | 639 | if (err) { |
639 | mutex_unlock(&event_mutex); | ||
640 | filter_free_subsystem_preds(system); | 640 | filter_free_subsystem_preds(system); |
641 | parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); | 641 | parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); |
642 | goto out; | 642 | goto out; |
643 | } | 643 | } |
644 | replace_filter_string(call->filter, filter_string); | 644 | replace_filter_string(call->filter, filter_string); |
645 | } | 645 | } |
646 | mutex_unlock(&event_mutex); | ||
647 | out: | 646 | out: |
648 | return err; | 647 | return err; |
649 | } | 648 | } |
@@ -1070,12 +1069,12 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) | |||
1070 | 1069 | ||
1071 | struct filter_parse_state *ps; | 1070 | struct filter_parse_state *ps; |
1072 | 1071 | ||
1073 | mutex_lock(&filter_mutex); | 1072 | mutex_lock(&event_mutex); |
1074 | 1073 | ||
1075 | if (!strcmp(strstrip(filter_string), "0")) { | 1074 | if (!strcmp(strstrip(filter_string), "0")) { |
1076 | filter_disable_preds(call); | 1075 | filter_disable_preds(call); |
1077 | remove_filter_string(call->filter); | 1076 | remove_filter_string(call->filter); |
1078 | mutex_unlock(&filter_mutex); | 1077 | mutex_unlock(&event_mutex); |
1079 | return 0; | 1078 | return 0; |
1080 | } | 1079 | } |
1081 | 1080 | ||
@@ -1103,7 +1102,7 @@ out: | |||
1103 | postfix_clear(ps); | 1102 | postfix_clear(ps); |
1104 | kfree(ps); | 1103 | kfree(ps); |
1105 | out_unlock: | 1104 | out_unlock: |
1106 | mutex_unlock(&filter_mutex); | 1105 | mutex_unlock(&event_mutex); |
1107 | 1106 | ||
1108 | return err; | 1107 | return err; |
1109 | } | 1108 | } |
@@ -1115,12 +1114,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system, | |||
1115 | 1114 | ||
1116 | struct filter_parse_state *ps; | 1115 | struct filter_parse_state *ps; |
1117 | 1116 | ||
1118 | mutex_lock(&filter_mutex); | 1117 | mutex_lock(&event_mutex); |
1119 | 1118 | ||
1120 | if (!strcmp(strstrip(filter_string), "0")) { | 1119 | if (!strcmp(strstrip(filter_string), "0")) { |
1121 | filter_free_subsystem_preds(system); | 1120 | filter_free_subsystem_preds(system); |
1122 | remove_filter_string(system->filter); | 1121 | remove_filter_string(system->filter); |
1123 | mutex_unlock(&filter_mutex); | 1122 | mutex_unlock(&event_mutex); |
1124 | return 0; | 1123 | return 0; |
1125 | } | 1124 | } |
1126 | 1125 | ||
@@ -1148,7 +1147,7 @@ out: | |||
1148 | postfix_clear(ps); | 1147 | postfix_clear(ps); |
1149 | kfree(ps); | 1148 | kfree(ps); |
1150 | out_unlock: | 1149 | out_unlock: |
1151 | mutex_unlock(&filter_mutex); | 1150 | mutex_unlock(&event_mutex); |
1152 | 1151 | ||
1153 | return err; | 1152 | return err; |
1154 | } | 1153 | } |
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index c9a0b7df44ff..90f134764837 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c | |||
@@ -193,9 +193,11 @@ static void tracing_start_function_trace(void) | |||
193 | static void tracing_stop_function_trace(void) | 193 | static void tracing_stop_function_trace(void) |
194 | { | 194 | { |
195 | ftrace_function_enabled = 0; | 195 | ftrace_function_enabled = 0; |
196 | /* OK if they are not registered */ | 196 | |
197 | unregister_ftrace_function(&trace_stack_ops); | 197 | if (func_flags.val & TRACE_FUNC_OPT_STACK) |
198 | unregister_ftrace_function(&trace_ops); | 198 | unregister_ftrace_function(&trace_stack_ops); |
199 | else | ||
200 | unregister_ftrace_function(&trace_ops); | ||
199 | } | 201 | } |
200 | 202 | ||
201 | static int func_set_flag(u32 old_flags, u32 bit, int set) | 203 | static int func_set_flag(u32 old_flags, u32 bit, int set) |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 8b592418d8b2..d2249abafb53 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -57,7 +57,8 @@ static struct tracer_flags tracer_flags = { | |||
57 | 57 | ||
58 | /* Add a function return address to the trace stack on thread info.*/ | 58 | /* Add a function return address to the trace stack on thread info.*/ |
59 | int | 59 | int |
60 | ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) | 60 | ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, |
61 | unsigned long frame_pointer) | ||
61 | { | 62 | { |
62 | unsigned long long calltime; | 63 | unsigned long long calltime; |
63 | int index; | 64 | int index; |
@@ -85,6 +86,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) | |||
85 | current->ret_stack[index].func = func; | 86 | current->ret_stack[index].func = func; |
86 | current->ret_stack[index].calltime = calltime; | 87 | current->ret_stack[index].calltime = calltime; |
87 | current->ret_stack[index].subtime = 0; | 88 | current->ret_stack[index].subtime = 0; |
89 | current->ret_stack[index].fp = frame_pointer; | ||
88 | *depth = index; | 90 | *depth = index; |
89 | 91 | ||
90 | return 0; | 92 | return 0; |
@@ -92,7 +94,8 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) | |||
92 | 94 | ||
93 | /* Retrieve a function return address to the trace stack on thread info.*/ | 95 | /* Retrieve a function return address to the trace stack on thread info.*/ |
94 | static void | 96 | static void |
95 | ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) | 97 | ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, |
98 | unsigned long frame_pointer) | ||
96 | { | 99 | { |
97 | int index; | 100 | int index; |
98 | 101 | ||
@@ -106,6 +109,31 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) | |||
106 | return; | 109 | return; |
107 | } | 110 | } |
108 | 111 | ||
112 | #ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST | ||
113 | /* | ||
114 | * The arch may choose to record the frame pointer used | ||
115 | * and check it here to make sure that it is what we expect it | ||
116 | * to be. If gcc does not set the place holder of the return | ||
117 | * address in the frame pointer, and does a copy instead, then | ||
118 | * the function graph trace will fail. This test detects this | ||
119 | * case. | ||
120 | * | ||
121 | * Currently, x86_32 with optimize for size (-Os) makes the latest | ||
122 | * gcc do the above. | ||
123 | */ | ||
124 | if (unlikely(current->ret_stack[index].fp != frame_pointer)) { | ||
125 | ftrace_graph_stop(); | ||
126 | WARN(1, "Bad frame pointer: expected %lx, received %lx\n" | ||
127 | " from func %pF return to %lx\n", | ||
128 | current->ret_stack[index].fp, | ||
129 | frame_pointer, | ||
130 | (void *)current->ret_stack[index].func, | ||
131 | current->ret_stack[index].ret); | ||
132 | *ret = (unsigned long)panic; | ||
133 | return; | ||
134 | } | ||
135 | #endif | ||
136 | |||
109 | *ret = current->ret_stack[index].ret; | 137 | *ret = current->ret_stack[index].ret; |
110 | trace->func = current->ret_stack[index].func; | 138 | trace->func = current->ret_stack[index].func; |
111 | trace->calltime = current->ret_stack[index].calltime; | 139 | trace->calltime = current->ret_stack[index].calltime; |
@@ -117,12 +145,12 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) | |||
117 | * Send the trace to the ring-buffer. | 145 | * Send the trace to the ring-buffer. |
118 | * @return the original return address. | 146 | * @return the original return address. |
119 | */ | 147 | */ |
120 | unsigned long ftrace_return_to_handler(void) | 148 | unsigned long ftrace_return_to_handler(unsigned long frame_pointer) |
121 | { | 149 | { |
122 | struct ftrace_graph_ret trace; | 150 | struct ftrace_graph_ret trace; |
123 | unsigned long ret; | 151 | unsigned long ret; |
124 | 152 | ||
125 | ftrace_pop_return_trace(&trace, &ret); | 153 | ftrace_pop_return_trace(&trace, &ret, frame_pointer); |
126 | trace.rettime = trace_clock_local(); | 154 | trace.rettime = trace_clock_local(); |
127 | ftrace_graph_return(&trace); | 155 | ftrace_graph_return(&trace); |
128 | barrier(); | 156 | barrier(); |