aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSteven Rostedt (Red Hat) <rostedt@goodmis.org>2015-05-29 09:40:18 -0400
committerSteven Rostedt <rostedt@goodmis.org>2015-07-20 22:30:48 -0400
commita4543a2fa9ef31d6d0f854a4e14f8f82e7996d8d (patch)
tree20ce0568e66867059279578471fe574d18c21cad /kernel
parent9826b2733a4399149072058a11f611357479229d (diff)
ring-buffer: Get timestamp after event is allocated
Move the capturing of the timestamp to after an event is allocated. If the event is not a commit (where it is an event that preempted another event), then no timestamp is needed, because the delta of nested events is always zero. If the event starts on a new page, no delta needs to be calculated as the full timestamp will be added to the page header, and the event will have a delta of zero. Now if the event requires a time extend (the delta does not fit in the 27 bit delta slot in the header), then the event is discarded, the length is extended to hold the TIME_EXTEND event that allows for a 59 bit delta, and the commit is tried again. If the event can't be discarded (another event came in after it), then the TIME_EXTEND is added directly to the allocated event and the rest of the event is given padding. Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/trace/ring_buffer.c161
1 files changed, 114 insertions, 47 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a78d4ee4bc58..b5ed553e0a45 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2009,7 +2009,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
2009 * and with this, we can determine what to place into the 2009 * and with this, we can determine what to place into the
2010 * data field. 2010 * data field.
2011 */ 2011 */
2012static void 2012static void __always_inline
2013rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, 2013rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
2014 struct ring_buffer_event *event, 2014 struct ring_buffer_event *event,
2015 struct rb_event_info *info) 2015 struct rb_event_info *info)
@@ -2017,10 +2017,6 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
2017 unsigned length = info->length; 2017 unsigned length = info->length;
2018 u64 delta = info->delta; 2018 u64 delta = info->delta;
2019 2019
2020 /* Only a commit updates the timestamp */
2021 if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
2022 delta = 0;
2023
2024 /* 2020 /*
2025 * If we need to add a timestamp, then we 2021 * If we need to add a timestamp, then we
2026 * add it to the start of the resevered space. 2022 * add it to the start of the resevered space.
@@ -2286,6 +2282,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
2286 local_sub(length, &tail_page->write); 2282 local_sub(length, &tail_page->write);
2287} 2283}
2288 2284
2285static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer);
2286
2289/* 2287/*
2290 * This is the slow path, force gcc not to inline it. 2288 * This is the slow path, force gcc not to inline it.
2291 */ 2289 */
@@ -2300,6 +2298,16 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
2300 int ret; 2298 int ret;
2301 u64 ts; 2299 u64 ts;
2302 2300
2301 /*
2302 * If the event had a timestamp attached to it, remove it.
2303 * The first event on a page (nested or not) always uses
2304 * the full timestamp of the new page.
2305 */
2306 if (info->add_timestamp) {
2307 info->add_timestamp = 0;
2308 info->length -= RB_LEN_TIME_EXTEND;
2309 }
2310
2303 next_page = tail_page; 2311 next_page = tail_page;
2304 2312
2305 rb_inc_page(cpu_buffer, &next_page); 2313 rb_inc_page(cpu_buffer, &next_page);
@@ -2386,6 +2394,11 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
2386 2394
2387 rb_reset_tail(cpu_buffer, tail, info); 2395 rb_reset_tail(cpu_buffer, tail, info);
2388 2396
2397 /* Commit what we have for now to update timestamps */
2398 rb_end_commit(cpu_buffer);
2399 /* rb_end_commit() decs committing */
2400 local_inc(&cpu_buffer->committing);
2401
2389 /* fail and let the caller try again */ 2402 /* fail and let the caller try again */
2390 return ERR_PTR(-EAGAIN); 2403 return ERR_PTR(-EAGAIN);
2391 2404
@@ -2403,10 +2416,23 @@ static inline bool sched_clock_stable(void)
2403} 2416}
2404#endif 2417#endif
2405 2418
2419static inline int
2420rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2421 struct ring_buffer_event *event);
2422static inline void rb_event_discard(struct ring_buffer_event *event);
2423static void
2424rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2425 struct ring_buffer_event *event);
2426
2406static noinline void 2427static noinline void
2407rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer, 2428rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
2429 struct ring_buffer_event *event,
2408 struct rb_event_info *info) 2430 struct rb_event_info *info)
2409{ 2431{
2432 struct ring_buffer_event *padding;
2433 int length;
2434 int size;
2435
2410 WARN_ONCE(info->delta > (1ULL << 59), 2436 WARN_ONCE(info->delta > (1ULL << 59),
2411 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", 2437 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
2412 (unsigned long long)info->delta, 2438 (unsigned long long)info->delta,
@@ -2416,7 +2442,61 @@ rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
2416 "If you just came from a suspend/resume,\n" 2442 "If you just came from a suspend/resume,\n"
2417 "please switch to the trace global clock:\n" 2443 "please switch to the trace global clock:\n"
2418 " echo global > /sys/kernel/debug/tracing/trace_clock\n"); 2444 " echo global > /sys/kernel/debug/tracing/trace_clock\n");
2419 info->add_timestamp = 1; 2445
2446 /*
2447 * Discarding this event to add a timestamp in front, but
2448 * we still need to update the length of it to perform the discard.
2449 */
2450 rb_update_event(cpu_buffer, event, info);
2451
2452 if (rb_try_to_discard(cpu_buffer, event)) {
2453 info->add_timestamp = 1;
2454 /*
2455 * The time delta since the last event is too big to
2456 * hold in the time field of the event, then we append a
2457 * TIME EXTEND event ahead of the data event.
2458 */
2459 info->length += RB_LEN_TIME_EXTEND;
2460 return;
2461 }
2462
2463 /*
2464 * Humpf! An event came in after this one, and because it is not a
2465 * commit, it will have a delta of zero, thus, it will take on
2466 * the timestamp of the previous commit, which happened a long time
2467 * ago (we need to add a timestamp, remember?).
2468 * We need to add the timestamp here. A timestamp is a fixed size
2469 * of 8 bytes. That means the rest of the event needs to be
2470 * padding.
2471 */
2472 size = info->length - RB_LEN_TIME_EXTEND;
2473
2474 /* The padding will have a delta of 1 */
2475 if (size)
2476 info->delta--;
2477
2478 padding = rb_add_time_stamp(event, info->delta);
2479
2480 if (size) {
2481 length = info->length;
2482 info->delta = 0;
2483 info->length = size;
2484 rb_update_event(cpu_buffer, padding, info);
2485
2486 rb_event_discard(padding);
2487
2488 /* Still visible, need to update write_stamp */
2489 rb_update_write_stamp(cpu_buffer, event);
2490
2491 /* Still need to commit the padding. */
2492 rb_end_commit(cpu_buffer);
2493
2494 /* rb_end_commit() decs committing */
2495 local_inc(&cpu_buffer->committing);
2496
2497 /* The next iteration still uses the original length */
2498 info->length = length;
2499 }
2420} 2500}
2421 2501
2422static struct ring_buffer_event * 2502static struct ring_buffer_event *
@@ -2426,14 +2506,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
2426 struct ring_buffer_event *event; 2506 struct ring_buffer_event *event;
2427 struct buffer_page *tail_page; 2507 struct buffer_page *tail_page;
2428 unsigned long tail, write; 2508 unsigned long tail, write;
2429 2509 bool is_commit;
2430 /*
2431 * If the time delta since the last event is too big to
2432 * hold in the time field of the event, then we append a
2433 * TIME EXTEND event ahead of the data event.
2434 */
2435 if (unlikely(info->add_timestamp))
2436 info->length += RB_LEN_TIME_EXTEND;
2437 2510
2438 tail_page = info->tail_page = cpu_buffer->tail_page; 2511 tail_page = info->tail_page = cpu_buffer->tail_page;
2439 write = local_add_return(info->length, &tail_page->write); 2512 write = local_add_return(info->length, &tail_page->write);
@@ -2442,31 +2515,42 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
2442 write &= RB_WRITE_MASK; 2515 write &= RB_WRITE_MASK;
2443 tail = write - info->length; 2516 tail = write - info->length;
2444 2517
2445 /*
2446 * If this is the first commit on the page, then it has the same
2447 * timestamp as the page itself.
2448 */
2449 if (!tail)
2450 info->delta = 0;
2451
2452 /* See if we shot pass the end of this buffer page */ 2518 /* See if we shot pass the end of this buffer page */
2453 if (unlikely(write > BUF_PAGE_SIZE)) 2519 if (unlikely(write > BUF_PAGE_SIZE))
2454 return rb_move_tail(cpu_buffer, tail, info); 2520 return rb_move_tail(cpu_buffer, tail, info);
2455 2521
2456 /* We reserved something on the buffer */ 2522 /* We reserved something on the buffer */
2457
2458 event = __rb_page_index(tail_page, tail); 2523 event = __rb_page_index(tail_page, tail);
2459 kmemcheck_annotate_bitfield(event, bitfield);
2460 rb_update_event(cpu_buffer, event, info);
2461
2462 local_inc(&tail_page->entries);
2463 2524
2464 /* 2525 /*
2465 * If this is the first commit on the page, then update 2526 * If this is the first commit on the page, then it has the same
2466 * its timestamp. 2527 * timestamp as the page itself, otherwise we need to figure out
2528 * the delta.
2467 */ 2529 */
2468 if (!tail) 2530 info->ts = rb_time_stamp(cpu_buffer->buffer);
2531 is_commit = rb_event_is_commit(cpu_buffer, event);
2532
2533 /* Commits are special (non nested events) */
2534 info->delta = is_commit ? info->ts - cpu_buffer->write_stamp : 0;
2535
2536 if (!tail) {
2537 /*
2538 * If this is the first commit on the page, set the
2539 * page to its timestamp.
2540 */
2469 tail_page->page->time_stamp = info->ts; 2541 tail_page->page->time_stamp = info->ts;
2542 info->delta = 0;
2543
2544 } else if (unlikely(test_time_stamp(info->delta)) &&
2545 !info->add_timestamp) {
2546 rb_handle_timestamp(cpu_buffer, event, info);
2547 return ERR_PTR(-EAGAIN);
2548 }
2549
2550 kmemcheck_annotate_bitfield(event, bitfield);
2551 rb_update_event(cpu_buffer, event, info);
2552
2553 local_inc(&tail_page->entries);
2470 2554
2471 /* account for these added bytes */ 2555 /* account for these added bytes */
2472 local_add(info->length, &cpu_buffer->entries_bytes); 2556 local_add(info->length, &cpu_buffer->entries_bytes);
@@ -2560,7 +2644,6 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2560 struct ring_buffer_event *event; 2644 struct ring_buffer_event *event;
2561 struct rb_event_info info; 2645 struct rb_event_info info;
2562 int nr_loops = 0; 2646 int nr_loops = 0;
2563 u64 diff;
2564 2647
2565 rb_start_commit(cpu_buffer); 2648 rb_start_commit(cpu_buffer);
2566 2649
@@ -2578,12 +2661,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2578 return NULL; 2661 return NULL;
2579 } 2662 }
2580#endif 2663#endif
2581
2582 info.length = rb_calculate_event_length(length); 2664 info.length = rb_calculate_event_length(length);
2583 again:
2584 info.add_timestamp = 0; 2665 info.add_timestamp = 0;
2585 info.delta = 0; 2666 again:
2586
2587 /* 2667 /*
2588 * We allow for interrupts to reenter here and do a trace. 2668 * We allow for interrupts to reenter here and do a trace.
2589 * If one does, it will cause this original code to loop 2669 * If one does, it will cause this original code to loop
@@ -2596,19 +2676,6 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2596 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) 2676 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
2597 goto out_fail; 2677 goto out_fail;
2598 2678
2599 info.ts = rb_time_stamp(cpu_buffer->buffer);
2600 diff = info.ts - cpu_buffer->write_stamp;
2601
2602 /* make sure this diff is calculated here */
2603 barrier();
2604
2605 /* Did the write stamp get updated already? */
2606 if (likely(info.ts >= cpu_buffer->write_stamp)) {
2607 info.delta = diff;
2608 if (unlikely(test_time_stamp(info.delta)))
2609 rb_handle_timestamp(cpu_buffer, &info);
2610 }
2611
2612 event = __rb_reserve_next(cpu_buffer, &info); 2679 event = __rb_reserve_next(cpu_buffer, &info);
2613 2680
2614 if (unlikely(PTR_ERR(event) == -EAGAIN)) 2681 if (unlikely(PTR_ERR(event) == -EAGAIN))