diff options
author | Steven Rostedt (Red Hat) <rostedt@goodmis.org> | 2015-05-29 09:40:18 -0400 |
---|---|---|
committer | Steven Rostedt <rostedt@goodmis.org> | 2015-07-20 22:30:48 -0400 |
commit | a4543a2fa9ef31d6d0f854a4e14f8f82e7996d8d (patch) | |
tree | 20ce0568e66867059279578471fe574d18c21cad /kernel | |
parent | 9826b2733a4399149072058a11f611357479229d (diff) |
ring-buffer: Get timestamp after event is allocated
Move the capturing of the timestamp to after an event is allocated.
If the event is not a commit (where it is an event that preempted
another event), then no timestamp is needed, because the delta of
nested events is always zero.
If the event starts on a new page, no delta needs to be calculated
as the full timestamp will be added to the page header, and the
event will have a delta of zero.
Now if the event requires a time extend (the delta does not fit
in the 27 bit delta slot in the header), then the event is discarded,
the length is extended to hold the TIME_EXTEND event that allows for
a 59 bit delta, and the commit is tried again.
If the event can't be discarded (another event came in after it),
then the TIME_EXTEND is added directly to the allocated event and
the rest of the event is given padding.
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/trace/ring_buffer.c | 161 |
1 files changed, 114 insertions, 47 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a78d4ee4bc58..b5ed553e0a45 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -2009,7 +2009,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) | |||
2009 | * and with this, we can determine what to place into the | 2009 | * and with this, we can determine what to place into the |
2010 | * data field. | 2010 | * data field. |
2011 | */ | 2011 | */ |
2012 | static void | 2012 | static void __always_inline |
2013 | rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, | 2013 | rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, |
2014 | struct ring_buffer_event *event, | 2014 | struct ring_buffer_event *event, |
2015 | struct rb_event_info *info) | 2015 | struct rb_event_info *info) |
@@ -2017,10 +2017,6 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
2017 | unsigned length = info->length; | 2017 | unsigned length = info->length; |
2018 | u64 delta = info->delta; | 2018 | u64 delta = info->delta; |
2019 | 2019 | ||
2020 | /* Only a commit updates the timestamp */ | ||
2021 | if (unlikely(!rb_event_is_commit(cpu_buffer, event))) | ||
2022 | delta = 0; | ||
2023 | |||
2024 | /* | 2020 | /* |
2025 | * If we need to add a timestamp, then we | 2021 | * If we need to add a timestamp, then we |
2026 | * add it to the start of the resevered space. | 2022 | * add it to the start of the resevered space. |
@@ -2286,6 +2282,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
2286 | local_sub(length, &tail_page->write); | 2282 | local_sub(length, &tail_page->write); |
2287 | } | 2283 | } |
2288 | 2284 | ||
2285 | static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer); | ||
2286 | |||
2289 | /* | 2287 | /* |
2290 | * This is the slow path, force gcc not to inline it. | 2288 | * This is the slow path, force gcc not to inline it. |
2291 | */ | 2289 | */ |
@@ -2300,6 +2298,16 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
2300 | int ret; | 2298 | int ret; |
2301 | u64 ts; | 2299 | u64 ts; |
2302 | 2300 | ||
2301 | /* | ||
2302 | * If the event had a timestamp attached to it, remove it. | ||
2303 | * The first event on a page (nested or not) always uses | ||
2304 | * the full timestamp of the new page. | ||
2305 | */ | ||
2306 | if (info->add_timestamp) { | ||
2307 | info->add_timestamp = 0; | ||
2308 | info->length -= RB_LEN_TIME_EXTEND; | ||
2309 | } | ||
2310 | |||
2303 | next_page = tail_page; | 2311 | next_page = tail_page; |
2304 | 2312 | ||
2305 | rb_inc_page(cpu_buffer, &next_page); | 2313 | rb_inc_page(cpu_buffer, &next_page); |
@@ -2386,6 +2394,11 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
2386 | 2394 | ||
2387 | rb_reset_tail(cpu_buffer, tail, info); | 2395 | rb_reset_tail(cpu_buffer, tail, info); |
2388 | 2396 | ||
2397 | /* Commit what we have for now to update timestamps */ | ||
2398 | rb_end_commit(cpu_buffer); | ||
2399 | /* rb_end_commit() decs committing */ | ||
2400 | local_inc(&cpu_buffer->committing); | ||
2401 | |||
2389 | /* fail and let the caller try again */ | 2402 | /* fail and let the caller try again */ |
2390 | return ERR_PTR(-EAGAIN); | 2403 | return ERR_PTR(-EAGAIN); |
2391 | 2404 | ||
@@ -2403,10 +2416,23 @@ static inline bool sched_clock_stable(void) | |||
2403 | } | 2416 | } |
2404 | #endif | 2417 | #endif |
2405 | 2418 | ||
2419 | static inline int | ||
2420 | rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | ||
2421 | struct ring_buffer_event *event); | ||
2422 | static inline void rb_event_discard(struct ring_buffer_event *event); | ||
2423 | static void | ||
2424 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, | ||
2425 | struct ring_buffer_event *event); | ||
2426 | |||
2406 | static noinline void | 2427 | static noinline void |
2407 | rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer, | 2428 | rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer, |
2429 | struct ring_buffer_event *event, | ||
2408 | struct rb_event_info *info) | 2430 | struct rb_event_info *info) |
2409 | { | 2431 | { |
2432 | struct ring_buffer_event *padding; | ||
2433 | int length; | ||
2434 | int size; | ||
2435 | |||
2410 | WARN_ONCE(info->delta > (1ULL << 59), | 2436 | WARN_ONCE(info->delta > (1ULL << 59), |
2411 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", | 2437 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", |
2412 | (unsigned long long)info->delta, | 2438 | (unsigned long long)info->delta, |
@@ -2416,7 +2442,61 @@ rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
2416 | "If you just came from a suspend/resume,\n" | 2442 | "If you just came from a suspend/resume,\n" |
2417 | "please switch to the trace global clock:\n" | 2443 | "please switch to the trace global clock:\n" |
2418 | " echo global > /sys/kernel/debug/tracing/trace_clock\n"); | 2444 | " echo global > /sys/kernel/debug/tracing/trace_clock\n"); |
2419 | info->add_timestamp = 1; | 2445 | |
2446 | /* | ||
2447 | * Discarding this event to add a timestamp in front, but | ||
2448 | * we still need to update the length of it to perform the discard. | ||
2449 | */ | ||
2450 | rb_update_event(cpu_buffer, event, info); | ||
2451 | |||
2452 | if (rb_try_to_discard(cpu_buffer, event)) { | ||
2453 | info->add_timestamp = 1; | ||
2454 | /* | ||
2455 | * The time delta since the last event is too big to | ||
2456 | * hold in the time field of the event, then we append a | ||
2457 | * TIME EXTEND event ahead of the data event. | ||
2458 | */ | ||
2459 | info->length += RB_LEN_TIME_EXTEND; | ||
2460 | return; | ||
2461 | } | ||
2462 | |||
2463 | /* | ||
2464 | * Humpf! An event came in after this one, and because it is not a | ||
2465 | * commit, it will have a delta of zero, thus, it will take on | ||
2466 | * the timestamp of the previous commit, which happened a long time | ||
2467 | * ago (we need to add a timestamp, remember?). | ||
2468 | * We need to add the timestamp here. A timestamp is a fixed size | ||
2469 | * of 8 bytes. That means the rest of the event needs to be | ||
2470 | * padding. | ||
2471 | */ | ||
2472 | size = info->length - RB_LEN_TIME_EXTEND; | ||
2473 | |||
2474 | /* The padding will have a delta of 1 */ | ||
2475 | if (size) | ||
2476 | info->delta--; | ||
2477 | |||
2478 | padding = rb_add_time_stamp(event, info->delta); | ||
2479 | |||
2480 | if (size) { | ||
2481 | length = info->length; | ||
2482 | info->delta = 0; | ||
2483 | info->length = size; | ||
2484 | rb_update_event(cpu_buffer, padding, info); | ||
2485 | |||
2486 | rb_event_discard(padding); | ||
2487 | |||
2488 | /* Still visible, need to update write_stamp */ | ||
2489 | rb_update_write_stamp(cpu_buffer, event); | ||
2490 | |||
2491 | /* Still need to commit the padding. */ | ||
2492 | rb_end_commit(cpu_buffer); | ||
2493 | |||
2494 | /* rb_end_commit() decs committing */ | ||
2495 | local_inc(&cpu_buffer->committing); | ||
2496 | |||
2497 | /* The next iteration still uses the original length */ | ||
2498 | info->length = length; | ||
2499 | } | ||
2420 | } | 2500 | } |
2421 | 2501 | ||
2422 | static struct ring_buffer_event * | 2502 | static struct ring_buffer_event * |
@@ -2426,14 +2506,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
2426 | struct ring_buffer_event *event; | 2506 | struct ring_buffer_event *event; |
2427 | struct buffer_page *tail_page; | 2507 | struct buffer_page *tail_page; |
2428 | unsigned long tail, write; | 2508 | unsigned long tail, write; |
2429 | 2509 | bool is_commit; | |
2430 | /* | ||
2431 | * If the time delta since the last event is too big to | ||
2432 | * hold in the time field of the event, then we append a | ||
2433 | * TIME EXTEND event ahead of the data event. | ||
2434 | */ | ||
2435 | if (unlikely(info->add_timestamp)) | ||
2436 | info->length += RB_LEN_TIME_EXTEND; | ||
2437 | 2510 | ||
2438 | tail_page = info->tail_page = cpu_buffer->tail_page; | 2511 | tail_page = info->tail_page = cpu_buffer->tail_page; |
2439 | write = local_add_return(info->length, &tail_page->write); | 2512 | write = local_add_return(info->length, &tail_page->write); |
@@ -2442,31 +2515,42 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
2442 | write &= RB_WRITE_MASK; | 2515 | write &= RB_WRITE_MASK; |
2443 | tail = write - info->length; | 2516 | tail = write - info->length; |
2444 | 2517 | ||
2445 | /* | ||
2446 | * If this is the first commit on the page, then it has the same | ||
2447 | * timestamp as the page itself. | ||
2448 | */ | ||
2449 | if (!tail) | ||
2450 | info->delta = 0; | ||
2451 | |||
2452 | /* See if we shot pass the end of this buffer page */ | 2518 | /* See if we shot pass the end of this buffer page */ |
2453 | if (unlikely(write > BUF_PAGE_SIZE)) | 2519 | if (unlikely(write > BUF_PAGE_SIZE)) |
2454 | return rb_move_tail(cpu_buffer, tail, info); | 2520 | return rb_move_tail(cpu_buffer, tail, info); |
2455 | 2521 | ||
2456 | /* We reserved something on the buffer */ | 2522 | /* We reserved something on the buffer */ |
2457 | |||
2458 | event = __rb_page_index(tail_page, tail); | 2523 | event = __rb_page_index(tail_page, tail); |
2459 | kmemcheck_annotate_bitfield(event, bitfield); | ||
2460 | rb_update_event(cpu_buffer, event, info); | ||
2461 | |||
2462 | local_inc(&tail_page->entries); | ||
2463 | 2524 | ||
2464 | /* | 2525 | /* |
2465 | * If this is the first commit on the page, then update | 2526 | * If this is the first commit on the page, then it has the same |
2466 | * its timestamp. | 2527 | * timestamp as the page itself, otherwise we need to figure out |
2528 | * the delta. | ||
2467 | */ | 2529 | */ |
2468 | if (!tail) | 2530 | info->ts = rb_time_stamp(cpu_buffer->buffer); |
2531 | is_commit = rb_event_is_commit(cpu_buffer, event); | ||
2532 | |||
2533 | /* Commits are special (non nested events) */ | ||
2534 | info->delta = is_commit ? info->ts - cpu_buffer->write_stamp : 0; | ||
2535 | |||
2536 | if (!tail) { | ||
2537 | /* | ||
2538 | * If this is the first commit on the page, set the | ||
2539 | * page to its timestamp. | ||
2540 | */ | ||
2469 | tail_page->page->time_stamp = info->ts; | 2541 | tail_page->page->time_stamp = info->ts; |
2542 | info->delta = 0; | ||
2543 | |||
2544 | } else if (unlikely(test_time_stamp(info->delta)) && | ||
2545 | !info->add_timestamp) { | ||
2546 | rb_handle_timestamp(cpu_buffer, event, info); | ||
2547 | return ERR_PTR(-EAGAIN); | ||
2548 | } | ||
2549 | |||
2550 | kmemcheck_annotate_bitfield(event, bitfield); | ||
2551 | rb_update_event(cpu_buffer, event, info); | ||
2552 | |||
2553 | local_inc(&tail_page->entries); | ||
2470 | 2554 | ||
2471 | /* account for these added bytes */ | 2555 | /* account for these added bytes */ |
2472 | local_add(info->length, &cpu_buffer->entries_bytes); | 2556 | local_add(info->length, &cpu_buffer->entries_bytes); |
@@ -2560,7 +2644,6 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2560 | struct ring_buffer_event *event; | 2644 | struct ring_buffer_event *event; |
2561 | struct rb_event_info info; | 2645 | struct rb_event_info info; |
2562 | int nr_loops = 0; | 2646 | int nr_loops = 0; |
2563 | u64 diff; | ||
2564 | 2647 | ||
2565 | rb_start_commit(cpu_buffer); | 2648 | rb_start_commit(cpu_buffer); |
2566 | 2649 | ||
@@ -2578,12 +2661,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2578 | return NULL; | 2661 | return NULL; |
2579 | } | 2662 | } |
2580 | #endif | 2663 | #endif |
2581 | |||
2582 | info.length = rb_calculate_event_length(length); | 2664 | info.length = rb_calculate_event_length(length); |
2583 | again: | ||
2584 | info.add_timestamp = 0; | 2665 | info.add_timestamp = 0; |
2585 | info.delta = 0; | 2666 | again: |
2586 | |||
2587 | /* | 2667 | /* |
2588 | * We allow for interrupts to reenter here and do a trace. | 2668 | * We allow for interrupts to reenter here and do a trace. |
2589 | * If one does, it will cause this original code to loop | 2669 | * If one does, it will cause this original code to loop |
@@ -2596,19 +2676,6 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2596 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) | 2676 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) |
2597 | goto out_fail; | 2677 | goto out_fail; |
2598 | 2678 | ||
2599 | info.ts = rb_time_stamp(cpu_buffer->buffer); | ||
2600 | diff = info.ts - cpu_buffer->write_stamp; | ||
2601 | |||
2602 | /* make sure this diff is calculated here */ | ||
2603 | barrier(); | ||
2604 | |||
2605 | /* Did the write stamp get updated already? */ | ||
2606 | if (likely(info.ts >= cpu_buffer->write_stamp)) { | ||
2607 | info.delta = diff; | ||
2608 | if (unlikely(test_time_stamp(info.delta))) | ||
2609 | rb_handle_timestamp(cpu_buffer, &info); | ||
2610 | } | ||
2611 | |||
2612 | event = __rb_reserve_next(cpu_buffer, &info); | 2679 | event = __rb_reserve_next(cpu_buffer, &info); |
2613 | 2680 | ||
2614 | if (unlikely(PTR_ERR(event) == -EAGAIN)) | 2681 | if (unlikely(PTR_ERR(event) == -EAGAIN)) |