diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 104 |
1 files changed, 62 insertions, 42 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 16b24d49604c..7102d7a2fadb 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -370,6 +370,9 @@ static inline int test_time_stamp(u64 delta) | |||
370 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ | 370 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ |
371 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) | 371 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) |
372 | 372 | ||
373 | /* Max number of timestamps that can fit on a page */ | ||
374 | #define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_STAMP) | ||
375 | |||
373 | int ring_buffer_print_page_header(struct trace_seq *s) | 376 | int ring_buffer_print_page_header(struct trace_seq *s) |
374 | { | 377 | { |
375 | struct buffer_data_page field; | 378 | struct buffer_data_page field; |
@@ -1335,6 +1338,38 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1335 | return event; | 1338 | return event; |
1336 | } | 1339 | } |
1337 | 1340 | ||
1341 | static inline int | ||
1342 | rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | ||
1343 | struct ring_buffer_event *event) | ||
1344 | { | ||
1345 | unsigned long new_index, old_index; | ||
1346 | struct buffer_page *bpage; | ||
1347 | unsigned long index; | ||
1348 | unsigned long addr; | ||
1349 | |||
1350 | new_index = rb_event_index(event); | ||
1351 | old_index = new_index + rb_event_length(event); | ||
1352 | addr = (unsigned long)event; | ||
1353 | addr &= PAGE_MASK; | ||
1354 | |||
1355 | bpage = cpu_buffer->tail_page; | ||
1356 | |||
1357 | if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { | ||
1358 | /* | ||
1359 | * This is on the tail page. It is possible that | ||
1360 | * a write could come in and move the tail page | ||
1361 | * and write to the next page. That is fine | ||
1362 | * because we just shorten what is on this page. | ||
1363 | */ | ||
1364 | index = local_cmpxchg(&bpage->write, old_index, new_index); | ||
1365 | if (index == old_index) | ||
1366 | return 1; | ||
1367 | } | ||
1368 | |||
1369 | /* could not discard */ | ||
1370 | return 0; | ||
1371 | } | ||
1372 | |||
1338 | static int | 1373 | static int |
1339 | rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | 1374 | rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, |
1340 | u64 *ts, u64 *delta) | 1375 | u64 *ts, u64 *delta) |
@@ -1377,17 +1412,24 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
1377 | event->array[0] = *delta >> TS_SHIFT; | 1412 | event->array[0] = *delta >> TS_SHIFT; |
1378 | } else { | 1413 | } else { |
1379 | cpu_buffer->commit_page->page->time_stamp = *ts; | 1414 | cpu_buffer->commit_page->page->time_stamp = *ts; |
1380 | event->time_delta = 0; | 1415 | /* try to discard, since we do not need this */ |
1381 | event->array[0] = 0; | 1416 | if (!rb_try_to_discard(cpu_buffer, event)) { |
1417 | /* nope, just zero it */ | ||
1418 | event->time_delta = 0; | ||
1419 | event->array[0] = 0; | ||
1420 | } | ||
1382 | } | 1421 | } |
1383 | cpu_buffer->write_stamp = *ts; | 1422 | cpu_buffer->write_stamp = *ts; |
1384 | /* let the caller know this was the commit */ | 1423 | /* let the caller know this was the commit */ |
1385 | ret = 1; | 1424 | ret = 1; |
1386 | } else { | 1425 | } else { |
1387 | /* Darn, this is just wasted space */ | 1426 | /* Try to discard the event */ |
1388 | event->time_delta = 0; | 1427 | if (!rb_try_to_discard(cpu_buffer, event)) { |
1389 | event->array[0] = 0; | 1428 | /* Darn, this is just wasted space */ |
1390 | ret = 0; | 1429 | event->time_delta = 0; |
1430 | event->array[0] = 0; | ||
1431 | ret = 0; | ||
1432 | } | ||
1391 | } | 1433 | } |
1392 | 1434 | ||
1393 | *delta = 0; | 1435 | *delta = 0; |
@@ -1682,10 +1724,6 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, | |||
1682 | struct ring_buffer_event *event) | 1724 | struct ring_buffer_event *event) |
1683 | { | 1725 | { |
1684 | struct ring_buffer_per_cpu *cpu_buffer; | 1726 | struct ring_buffer_per_cpu *cpu_buffer; |
1685 | unsigned long new_index, old_index; | ||
1686 | struct buffer_page *bpage; | ||
1687 | unsigned long index; | ||
1688 | unsigned long addr; | ||
1689 | int cpu; | 1727 | int cpu; |
1690 | 1728 | ||
1691 | /* The event is discarded regardless */ | 1729 | /* The event is discarded regardless */ |
@@ -1701,24 +1739,8 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, | |||
1701 | cpu = smp_processor_id(); | 1739 | cpu = smp_processor_id(); |
1702 | cpu_buffer = buffer->buffers[cpu]; | 1740 | cpu_buffer = buffer->buffers[cpu]; |
1703 | 1741 | ||
1704 | new_index = rb_event_index(event); | 1742 | if (!rb_try_to_discard(cpu_buffer, event)) |
1705 | old_index = new_index + rb_event_length(event); | 1743 | goto out; |
1706 | addr = (unsigned long)event; | ||
1707 | addr &= PAGE_MASK; | ||
1708 | |||
1709 | bpage = cpu_buffer->tail_page; | ||
1710 | |||
1711 | if (bpage == (void *)addr && rb_page_write(bpage) == old_index) { | ||
1712 | /* | ||
1713 | * This is on the tail page. It is possible that | ||
1714 | * a write could come in and move the tail page | ||
1715 | * and write to the next page. That is fine | ||
1716 | * because we just shorten what is on this page. | ||
1717 | */ | ||
1718 | index = local_cmpxchg(&bpage->write, old_index, new_index); | ||
1719 | if (index == old_index) | ||
1720 | goto out; | ||
1721 | } | ||
1722 | 1744 | ||
1723 | /* | 1745 | /* |
1724 | * The commit is still visible by the reader, so we | 1746 | * The commit is still visible by the reader, so we |
@@ -2253,8 +2275,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) | |||
2253 | * Check if we are at the end of the buffer. | 2275 | * Check if we are at the end of the buffer. |
2254 | */ | 2276 | */ |
2255 | if (iter->head >= rb_page_size(iter->head_page)) { | 2277 | if (iter->head >= rb_page_size(iter->head_page)) { |
2256 | if (RB_WARN_ON(buffer, | 2278 | /* discarded commits can make the page empty */ |
2257 | iter->head_page == cpu_buffer->commit_page)) | 2279 | if (iter->head_page == cpu_buffer->commit_page) |
2258 | return; | 2280 | return; |
2259 | rb_inc_iter(iter); | 2281 | rb_inc_iter(iter); |
2260 | return; | 2282 | return; |
@@ -2297,12 +2319,10 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
2297 | /* | 2319 | /* |
2298 | * We repeat when a timestamp is encountered. It is possible | 2320 | * We repeat when a timestamp is encountered. It is possible |
2299 | * to get multiple timestamps from an interrupt entering just | 2321 | * to get multiple timestamps from an interrupt entering just |
2300 | * as one timestamp is about to be written. The max times | 2322 | * as one timestamp is about to be written, or from discarded |
2301 | * that this can happen is the number of nested interrupts we | 2323 | * commits. The most that we can have is the number on a single page. |
2302 | * can have. Nesting 10 deep of interrupts is clearly | ||
2303 | * an anomaly. | ||
2304 | */ | 2324 | */ |
2305 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) | 2325 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) |
2306 | return NULL; | 2326 | return NULL; |
2307 | 2327 | ||
2308 | reader = rb_get_reader_page(cpu_buffer); | 2328 | reader = rb_get_reader_page(cpu_buffer); |
@@ -2368,14 +2388,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
2368 | 2388 | ||
2369 | again: | 2389 | again: |
2370 | /* | 2390 | /* |
2371 | * We repeat when a timestamp is encountered. It is possible | 2391 | * We repeat when a timestamp is encountered. |
2372 | * to get multiple timestamps from an interrupt entering just | 2392 | * We can get multiple timestamps by nested interrupts or also |
2373 | * as one timestamp is about to be written. The max times | 2393 | * if filtering is on (discarding commits). Since discarding |
2374 | * that this can happen is the number of nested interrupts we | 2394 | * commits can be frequent we can get a lot of timestamps. |
2375 | * can have. Nesting 10 deep of interrupts is clearly | 2395 | * But we limit them by not adding timestamps if they begin |
2376 | * an anomaly. | 2396 | * at the start of a page. |
2377 | */ | 2397 | */ |
2378 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) | 2398 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) |
2379 | return NULL; | 2399 | return NULL; |
2380 | 2400 | ||
2381 | if (rb_per_cpu_empty(cpu_buffer)) | 2401 | if (rb_per_cpu_empty(cpu_buffer)) |