aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorSteven Rostedt (Red Hat) <rostedt@goodmis.org>2015-05-29 12:12:27 -0400
committerSteven Rostedt <rostedt@goodmis.org>2015-07-20 22:30:49 -0400
commitd90fd77402d3de56a9ca3df04e5d868d0979dc59 (patch)
treeed6ab987ed33a81e3c57b36233648f09512a7872 /kernel/trace
parent7d75e6833b579adb3de2c7b917de1204eeafea47 (diff)
ring-buffer: Reorganize function locations
Functions in ring-buffer.c have gotten interleaved between different use cases. Move the functions around to get like functions closer together. This may or may not help gcc keep cache locality, but it makes it a little easier to work with the code. Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/ring_buffer.c814
1 files changed, 403 insertions, 411 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 781ce359976c..1cce0fbf92ce 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1887,73 +1887,6 @@ rb_event_index(struct ring_buffer_event *event)
1887 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE; 1887 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
1888} 1888}
1889 1889
1890static inline int
1891rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1892 struct ring_buffer_event *event)
1893{
1894 unsigned long addr = (unsigned long)event;
1895 unsigned long index;
1896
1897 index = rb_event_index(event);
1898 addr &= PAGE_MASK;
1899
1900 return cpu_buffer->commit_page->page == (void *)addr &&
1901 rb_commit_index(cpu_buffer) == index;
1902}
1903
1904static void
1905rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1906{
1907 unsigned long max_count;
1908
1909 /*
1910 * We only race with interrupts and NMIs on this CPU.
1911 * If we own the commit event, then we can commit
1912 * all others that interrupted us, since the interruptions
1913 * are in stack format (they finish before they come
1914 * back to us). This allows us to do a simple loop to
1915 * assign the commit to the tail.
1916 */
1917 again:
1918 max_count = cpu_buffer->nr_pages * 100;
1919
1920 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
1921 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
1922 return;
1923 if (RB_WARN_ON(cpu_buffer,
1924 rb_is_reader_page(cpu_buffer->tail_page)))
1925 return;
1926 local_set(&cpu_buffer->commit_page->page->commit,
1927 rb_page_write(cpu_buffer->commit_page));
1928 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
1929 cpu_buffer->write_stamp =
1930 cpu_buffer->commit_page->page->time_stamp;
1931 /* add barrier to keep gcc from optimizing too much */
1932 barrier();
1933 }
1934 while (rb_commit_index(cpu_buffer) !=
1935 rb_page_write(cpu_buffer->commit_page)) {
1936
1937 local_set(&cpu_buffer->commit_page->page->commit,
1938 rb_page_write(cpu_buffer->commit_page));
1939 RB_WARN_ON(cpu_buffer,
1940 local_read(&cpu_buffer->commit_page->page->commit) &
1941 ~RB_WRITE_MASK);
1942 barrier();
1943 }
1944
1945 /* again, keep gcc from optimizing */
1946 barrier();
1947
1948 /*
1949 * If an interrupt came in just after the first while loop
1950 * and pushed the tail page forward, we will be left with
1951 * a dangling commit that will never go forward.
1952 */
1953 if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
1954 goto again;
1955}
1956
1957static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 1890static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1958{ 1891{
1959 cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp; 1892 cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
@@ -1979,63 +1912,6 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1979 iter->head = 0; 1912 iter->head = 0;
1980} 1913}
1981 1914
1982/* Slow path, do not inline */
1983static noinline struct ring_buffer_event *
1984rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
1985{
1986 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
1987
1988 /* Not the first event on the page? */
1989 if (rb_event_index(event)) {
1990 event->time_delta = delta & TS_MASK;
1991 event->array[0] = delta >> TS_SHIFT;
1992 } else {
1993 /* nope, just zero it */
1994 event->time_delta = 0;
1995 event->array[0] = 0;
1996 }
1997
1998 return skip_time_extend(event);
1999}
2000
2001/**
2002 * rb_update_event - update event type and data
2003 * @event: the event to update
2004 * @type: the type of event
2005 * @length: the size of the event field in the ring buffer
2006 *
2007 * Update the type and data fields of the event. The length
2008 * is the actual size that is written to the ring buffer,
2009 * and with this, we can determine what to place into the
2010 * data field.
2011 */
2012static void __always_inline
2013rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
2014 struct ring_buffer_event *event,
2015 struct rb_event_info *info)
2016{
2017 unsigned length = info->length;
2018 u64 delta = info->delta;
2019
2020 /*
2021 * If we need to add a timestamp, then we
2022 * add it to the start of the resevered space.
2023 */
2024 if (unlikely(info->add_timestamp)) {
2025 event = rb_add_time_stamp(event, delta);
2026 length -= RB_LEN_TIME_EXTEND;
2027 delta = 0;
2028 }
2029
2030 event->time_delta = delta;
2031 length -= RB_EVNT_HDR_SIZE;
2032 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
2033 event->type_len = 0;
2034 event->array[0] = length;
2035 } else
2036 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
2037}
2038
2039/* 1915/*
2040 * rb_handle_head_page - writer hit the head page 1916 * rb_handle_head_page - writer hit the head page
2041 * 1917 *
@@ -2194,38 +2070,6 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
2194 return 0; 2070 return 0;
2195} 2071}
2196 2072
2197static unsigned rb_calculate_event_length(unsigned length)
2198{
2199 struct ring_buffer_event event; /* Used only for sizeof array */
2200
2201 /* zero length can cause confusions */
2202 if (!length)
2203 length++;
2204
2205 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
2206 length += sizeof(event.array[0]);
2207
2208 length += RB_EVNT_HDR_SIZE;
2209 length = ALIGN(length, RB_ARCH_ALIGNMENT);
2210
2211 /*
2212 * In case the time delta is larger than the 27 bits for it
2213 * in the header, we need to add a timestamp. If another
2214 * event comes in when trying to discard this one to increase
2215 * the length, then the timestamp will be added in the allocated
2216 * space of this event. If length is bigger than the size needed
2217 * for the TIME_EXTEND, then padding has to be used. The events
2218 * length must be either RB_LEN_TIME_EXTEND, or greater than or equal
2219 * to RB_LEN_TIME_EXTEND + 8, as 8 is the minimum size for padding.
2220 * As length is a multiple of 4, we only need to worry if it
2221 * is 12 (RB_LEN_TIME_EXTEND + 4).
2222 */
2223 if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
2224 length += RB_ALIGNMENT;
2225
2226 return length;
2227}
2228
2229static inline void 2073static inline void
2230rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, 2074rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
2231 unsigned long tail, struct rb_event_info *info) 2075 unsigned long tail, struct rb_event_info *info)
@@ -2424,6 +2268,95 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
2424 return NULL; 2268 return NULL;
2425} 2269}
2426 2270
2271/* Slow path, do not inline */
2272static noinline struct ring_buffer_event *
2273rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
2274{
2275 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
2276
2277 /* Not the first event on the page? */
2278 if (rb_event_index(event)) {
2279 event->time_delta = delta & TS_MASK;
2280 event->array[0] = delta >> TS_SHIFT;
2281 } else {
2282 /* nope, just zero it */
2283 event->time_delta = 0;
2284 event->array[0] = 0;
2285 }
2286
2287 return skip_time_extend(event);
2288}
2289
2290/**
2291 * rb_update_event - update event type and data
2292 * @event: the event to update
2293 * @type: the type of event
2294 * @length: the size of the event field in the ring buffer
2295 *
2296 * Update the type and data fields of the event. The length
2297 * is the actual size that is written to the ring buffer,
2298 * and with this, we can determine what to place into the
2299 * data field.
2300 */
2301static void __always_inline
2302rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
2303 struct ring_buffer_event *event,
2304 struct rb_event_info *info)
2305{
2306 unsigned length = info->length;
2307 u64 delta = info->delta;
2308
2309 /*
2310 * If we need to add a timestamp, then we
2311 * add it to the start of the resevered space.
2312 */
2313 if (unlikely(info->add_timestamp)) {
2314 event = rb_add_time_stamp(event, delta);
2315 length -= RB_LEN_TIME_EXTEND;
2316 delta = 0;
2317 }
2318
2319 event->time_delta = delta;
2320 length -= RB_EVNT_HDR_SIZE;
2321 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
2322 event->type_len = 0;
2323 event->array[0] = length;
2324 } else
2325 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
2326}
2327
2328static unsigned rb_calculate_event_length(unsigned length)
2329{
2330 struct ring_buffer_event event; /* Used only for sizeof array */
2331
2332 /* zero length can cause confusions */
2333 if (!length)
2334 length++;
2335
2336 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
2337 length += sizeof(event.array[0]);
2338
2339 length += RB_EVNT_HDR_SIZE;
2340 length = ALIGN(length, RB_ARCH_ALIGNMENT);
2341
2342 /*
2343 * In case the time delta is larger than the 27 bits for it
2344 * in the header, we need to add a timestamp. If another
2345 * event comes in when trying to discard this one to increase
2346 * the length, then the timestamp will be added in the allocated
2347 * space of this event. If length is bigger than the size needed
2348 * for the TIME_EXTEND, then padding has to be used. The events
2349 * length must be either RB_LEN_TIME_EXTEND, or greater than or equal
2350 * to RB_LEN_TIME_EXTEND + 8, as 8 is the minimum size for padding.
2351 * As length is a multiple of 4, we only need to worry if it
2352 * is 12 (RB_LEN_TIME_EXTEND + 4).
2353 */
2354 if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
2355 length += RB_ALIGNMENT;
2356
2357 return length;
2358}
2359
2427#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 2360#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
2428static inline bool sched_clock_stable(void) 2361static inline bool sched_clock_stable(void)
2429{ 2362{
@@ -2433,11 +2366,322 @@ static inline bool sched_clock_stable(void)
2433 2366
2434static inline int 2367static inline int
2435rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, 2368rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2436 struct ring_buffer_event *event); 2369 struct ring_buffer_event *event)
2437static inline void rb_event_discard(struct ring_buffer_event *event); 2370{
2371 unsigned long new_index, old_index;
2372 struct buffer_page *bpage;
2373 unsigned long index;
2374 unsigned long addr;
2375
2376 new_index = rb_event_index(event);
2377 old_index = new_index + rb_event_ts_length(event);
2378 addr = (unsigned long)event;
2379 addr &= PAGE_MASK;
2380
2381 bpage = cpu_buffer->tail_page;
2382
2383 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
2384 unsigned long write_mask =
2385 local_read(&bpage->write) & ~RB_WRITE_MASK;
2386 unsigned long event_length = rb_event_length(event);
2387 /*
2388 * This is on the tail page. It is possible that
2389 * a write could come in and move the tail page
2390 * and write to the next page. That is fine
2391 * because we just shorten what is on this page.
2392 */
2393 old_index += write_mask;
2394 new_index += write_mask;
2395 index = local_cmpxchg(&bpage->write, old_index, new_index);
2396 if (index == old_index) {
2397 /* update counters */
2398 local_sub(event_length, &cpu_buffer->entries_bytes);
2399 return 1;
2400 }
2401 }
2402
2403 /* could not discard */
2404 return 0;
2405}
2406
2407static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2408{
2409 local_inc(&cpu_buffer->committing);
2410 local_inc(&cpu_buffer->commits);
2411}
2412
2413static void
2414rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
2415{
2416 unsigned long max_count;
2417
2418 /*
2419 * We only race with interrupts and NMIs on this CPU.
2420 * If we own the commit event, then we can commit
2421 * all others that interrupted us, since the interruptions
2422 * are in stack format (they finish before they come
2423 * back to us). This allows us to do a simple loop to
2424 * assign the commit to the tail.
2425 */
2426 again:
2427 max_count = cpu_buffer->nr_pages * 100;
2428
2429 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
2430 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
2431 return;
2432 if (RB_WARN_ON(cpu_buffer,
2433 rb_is_reader_page(cpu_buffer->tail_page)))
2434 return;
2435 local_set(&cpu_buffer->commit_page->page->commit,
2436 rb_page_write(cpu_buffer->commit_page));
2437 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
2438 cpu_buffer->write_stamp =
2439 cpu_buffer->commit_page->page->time_stamp;
2440 /* add barrier to keep gcc from optimizing too much */
2441 barrier();
2442 }
2443 while (rb_commit_index(cpu_buffer) !=
2444 rb_page_write(cpu_buffer->commit_page)) {
2445
2446 local_set(&cpu_buffer->commit_page->page->commit,
2447 rb_page_write(cpu_buffer->commit_page));
2448 RB_WARN_ON(cpu_buffer,
2449 local_read(&cpu_buffer->commit_page->page->commit) &
2450 ~RB_WRITE_MASK);
2451 barrier();
2452 }
2453
2454 /* again, keep gcc from optimizing */
2455 barrier();
2456
2457 /*
2458 * If an interrupt came in just after the first while loop
2459 * and pushed the tail page forward, we will be left with
2460 * a dangling commit that will never go forward.
2461 */
2462 if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
2463 goto again;
2464}
2465
2466static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2467{
2468 unsigned long commits;
2469
2470 if (RB_WARN_ON(cpu_buffer,
2471 !local_read(&cpu_buffer->committing)))
2472 return;
2473
2474 again:
2475 commits = local_read(&cpu_buffer->commits);
2476 /* synchronize with interrupts */
2477 barrier();
2478 if (local_read(&cpu_buffer->committing) == 1)
2479 rb_set_commit_to_write(cpu_buffer);
2480
2481 local_dec(&cpu_buffer->committing);
2482
2483 /* synchronize with interrupts */
2484 barrier();
2485
2486 /*
2487 * Need to account for interrupts coming in between the
2488 * updating of the commit page and the clearing of the
2489 * committing counter.
2490 */
2491 if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
2492 !local_read(&cpu_buffer->committing)) {
2493 local_inc(&cpu_buffer->committing);
2494 goto again;
2495 }
2496}
2497
2498static inline void rb_event_discard(struct ring_buffer_event *event)
2499{
2500 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
2501 event = skip_time_extend(event);
2502
2503 /* array[0] holds the actual length for the discarded event */
2504 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2505 event->type_len = RINGBUF_TYPE_PADDING;
2506 /* time delta must be non zero */
2507 if (!event->time_delta)
2508 event->time_delta = 1;
2509}
2510
2511static inline int
2512rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
2513 struct ring_buffer_event *event)
2514{
2515 unsigned long addr = (unsigned long)event;
2516 unsigned long index;
2517
2518 index = rb_event_index(event);
2519 addr &= PAGE_MASK;
2520
2521 return cpu_buffer->commit_page->page == (void *)addr &&
2522 rb_commit_index(cpu_buffer) == index;
2523}
2524
2438static void 2525static void
2439rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, 2526rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2440 struct ring_buffer_event *event); 2527 struct ring_buffer_event *event)
2528{
2529 u64 delta;
2530
2531 /*
2532 * The event first in the commit queue updates the
2533 * time stamp.
2534 */
2535 if (rb_event_is_commit(cpu_buffer, event)) {
2536 /*
2537 * A commit event that is first on a page
2538 * updates the write timestamp with the page stamp
2539 */
2540 if (!rb_event_index(event))
2541 cpu_buffer->write_stamp =
2542 cpu_buffer->commit_page->page->time_stamp;
2543 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2544 delta = event->array[0];
2545 delta <<= TS_SHIFT;
2546 delta += event->time_delta;
2547 cpu_buffer->write_stamp += delta;
2548 } else
2549 cpu_buffer->write_stamp += event->time_delta;
2550 }
2551}
2552
2553static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2554 struct ring_buffer_event *event)
2555{
2556 local_inc(&cpu_buffer->entries);
2557 rb_update_write_stamp(cpu_buffer, event);
2558 rb_end_commit(cpu_buffer);
2559}
2560
2561static __always_inline void
2562rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
2563{
2564 bool pagebusy;
2565
2566 if (buffer->irq_work.waiters_pending) {
2567 buffer->irq_work.waiters_pending = false;
2568 /* irq_work_queue() supplies it's own memory barriers */
2569 irq_work_queue(&buffer->irq_work.work);
2570 }
2571
2572 if (cpu_buffer->irq_work.waiters_pending) {
2573 cpu_buffer->irq_work.waiters_pending = false;
2574 /* irq_work_queue() supplies it's own memory barriers */
2575 irq_work_queue(&cpu_buffer->irq_work.work);
2576 }
2577
2578 pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
2579
2580 if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
2581 cpu_buffer->irq_work.wakeup_full = true;
2582 cpu_buffer->irq_work.full_waiters_pending = false;
2583 /* irq_work_queue() supplies it's own memory barriers */
2584 irq_work_queue(&cpu_buffer->irq_work.work);
2585 }
2586}
2587
2588/*
2589 * The lock and unlock are done within a preempt disable section.
2590 * The current_context per_cpu variable can only be modified
2591 * by the current task between lock and unlock. But it can
2592 * be modified more than once via an interrupt. To pass this
2593 * information from the lock to the unlock without having to
2594 * access the 'in_interrupt()' functions again (which do show
2595 * a bit of overhead in something as critical as function tracing,
2596 * we use a bitmask trick.
2597 *
2598 * bit 0 = NMI context
2599 * bit 1 = IRQ context
2600 * bit 2 = SoftIRQ context
2601 * bit 3 = normal context.
2602 *
2603 * This works because this is the order of contexts that can
2604 * preempt other contexts. A SoftIRQ never preempts an IRQ
2605 * context.
2606 *
2607 * When the context is determined, the corresponding bit is
2608 * checked and set (if it was set, then a recursion of that context
2609 * happened).
2610 *
2611 * On unlock, we need to clear this bit. To do so, just subtract
2612 * 1 from the current_context and AND it to itself.
2613 *
2614 * (binary)
2615 * 101 - 1 = 100
2616 * 101 & 100 = 100 (clearing bit zero)
2617 *
2618 * 1010 - 1 = 1001
2619 * 1010 & 1001 = 1000 (clearing bit 1)
2620 *
2621 * The least significant bit can be cleared this way, and it
2622 * just so happens that it is the same bit corresponding to
2623 * the current context.
2624 */
2625
2626static __always_inline int
2627trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
2628{
2629 unsigned int val = cpu_buffer->current_context;
2630 int bit;
2631
2632 if (in_interrupt()) {
2633 if (in_nmi())
2634 bit = RB_CTX_NMI;
2635 else if (in_irq())
2636 bit = RB_CTX_IRQ;
2637 else
2638 bit = RB_CTX_SOFTIRQ;
2639 } else
2640 bit = RB_CTX_NORMAL;
2641
2642 if (unlikely(val & (1 << bit)))
2643 return 1;
2644
2645 val |= (1 << bit);
2646 cpu_buffer->current_context = val;
2647
2648 return 0;
2649}
2650
2651static __always_inline void
2652trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
2653{
2654 cpu_buffer->current_context &= cpu_buffer->current_context - 1;
2655}
2656
2657/**
2658 * ring_buffer_unlock_commit - commit a reserved
2659 * @buffer: The buffer to commit to
2660 * @event: The event pointer to commit.
2661 *
2662 * This commits the data to the ring buffer, and releases any locks held.
2663 *
2664 * Must be paired with ring_buffer_lock_reserve.
2665 */
2666int ring_buffer_unlock_commit(struct ring_buffer *buffer,
2667 struct ring_buffer_event *event)
2668{
2669 struct ring_buffer_per_cpu *cpu_buffer;
2670 int cpu = raw_smp_processor_id();
2671
2672 cpu_buffer = buffer->buffers[cpu];
2673
2674 rb_commit(cpu_buffer, event);
2675
2676 rb_wakeups(buffer, cpu_buffer);
2677
2678 trace_recursive_unlock(cpu_buffer);
2679
2680 preempt_enable_notrace();
2681
2682 return 0;
2683}
2684EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2441 2685
2442static noinline void 2686static noinline void
2443rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer, 2687rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
@@ -2573,84 +2817,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
2573 return event; 2817 return event;
2574} 2818}
2575 2819
2576static inline int
2577rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2578 struct ring_buffer_event *event)
2579{
2580 unsigned long new_index, old_index;
2581 struct buffer_page *bpage;
2582 unsigned long index;
2583 unsigned long addr;
2584
2585 new_index = rb_event_index(event);
2586 old_index = new_index + rb_event_ts_length(event);
2587 addr = (unsigned long)event;
2588 addr &= PAGE_MASK;
2589
2590 bpage = cpu_buffer->tail_page;
2591
2592 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
2593 unsigned long write_mask =
2594 local_read(&bpage->write) & ~RB_WRITE_MASK;
2595 unsigned long event_length = rb_event_length(event);
2596 /*
2597 * This is on the tail page. It is possible that
2598 * a write could come in and move the tail page
2599 * and write to the next page. That is fine
2600 * because we just shorten what is on this page.
2601 */
2602 old_index += write_mask;
2603 new_index += write_mask;
2604 index = local_cmpxchg(&bpage->write, old_index, new_index);
2605 if (index == old_index) {
2606 /* update counters */
2607 local_sub(event_length, &cpu_buffer->entries_bytes);
2608 return 1;
2609 }
2610 }
2611
2612 /* could not discard */
2613 return 0;
2614}
2615
2616static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2617{
2618 local_inc(&cpu_buffer->committing);
2619 local_inc(&cpu_buffer->commits);
2620}
2621
2622static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2623{
2624 unsigned long commits;
2625
2626 if (RB_WARN_ON(cpu_buffer,
2627 !local_read(&cpu_buffer->committing)))
2628 return;
2629
2630 again:
2631 commits = local_read(&cpu_buffer->commits);
2632 /* synchronize with interrupts */
2633 barrier();
2634 if (local_read(&cpu_buffer->committing) == 1)
2635 rb_set_commit_to_write(cpu_buffer);
2636
2637 local_dec(&cpu_buffer->committing);
2638
2639 /* synchronize with interrupts */
2640 barrier();
2641
2642 /*
2643 * Need to account for interrupts coming in between the
2644 * updating of the commit page and the clearing of the
2645 * committing counter.
2646 */
2647 if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
2648 !local_read(&cpu_buffer->committing)) {
2649 local_inc(&cpu_buffer->committing);
2650 goto again;
2651 }
2652}
2653
2654static struct ring_buffer_event * 2820static struct ring_buffer_event *
2655rb_reserve_next_event(struct ring_buffer *buffer, 2821rb_reserve_next_event(struct ring_buffer *buffer,
2656 struct ring_buffer_per_cpu *cpu_buffer, 2822 struct ring_buffer_per_cpu *cpu_buffer,
@@ -2706,75 +2872,6 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2706 return NULL; 2872 return NULL;
2707} 2873}
2708 2874
2709/*
2710 * The lock and unlock are done within a preempt disable section.
2711 * The current_context per_cpu variable can only be modified
2712 * by the current task between lock and unlock. But it can
2713 * be modified more than once via an interrupt. To pass this
2714 * information from the lock to the unlock without having to
2715 * access the 'in_interrupt()' functions again (which do show
2716 * a bit of overhead in something as critical as function tracing,
2717 * we use a bitmask trick.
2718 *
2719 * bit 0 = NMI context
2720 * bit 1 = IRQ context
2721 * bit 2 = SoftIRQ context
2722 * bit 3 = normal context.
2723 *
2724 * This works because this is the order of contexts that can
2725 * preempt other contexts. A SoftIRQ never preempts an IRQ
2726 * context.
2727 *
2728 * When the context is determined, the corresponding bit is
2729 * checked and set (if it was set, then a recursion of that context
2730 * happened).
2731 *
2732 * On unlock, we need to clear this bit. To do so, just subtract
2733 * 1 from the current_context and AND it to itself.
2734 *
2735 * (binary)
2736 * 101 - 1 = 100
2737 * 101 & 100 = 100 (clearing bit zero)
2738 *
2739 * 1010 - 1 = 1001
2740 * 1010 & 1001 = 1000 (clearing bit 1)
2741 *
2742 * The least significant bit can be cleared this way, and it
2743 * just so happens that it is the same bit corresponding to
2744 * the current context.
2745 */
2746
2747static __always_inline int
2748trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
2749{
2750 unsigned int val = cpu_buffer->current_context;
2751 int bit;
2752
2753 if (in_interrupt()) {
2754 if (in_nmi())
2755 bit = RB_CTX_NMI;
2756 else if (in_irq())
2757 bit = RB_CTX_IRQ;
2758 else
2759 bit = RB_CTX_SOFTIRQ;
2760 } else
2761 bit = RB_CTX_NORMAL;
2762
2763 if (unlikely(val & (1 << bit)))
2764 return 1;
2765
2766 val |= (1 << bit);
2767 cpu_buffer->current_context = val;
2768
2769 return 0;
2770}
2771
2772static __always_inline void
2773trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
2774{
2775 cpu_buffer->current_context &= cpu_buffer->current_context - 1;
2776}
2777
2778/** 2875/**
2779 * ring_buffer_lock_reserve - reserve a part of the buffer 2876 * ring_buffer_lock_reserve - reserve a part of the buffer
2780 * @buffer: the ring buffer to reserve from 2877 * @buffer: the ring buffer to reserve from
@@ -2833,111 +2930,6 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2833} 2930}
2834EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); 2931EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
2835 2932
2836static void
2837rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2838 struct ring_buffer_event *event)
2839{
2840 u64 delta;
2841
2842 /*
2843 * The event first in the commit queue updates the
2844 * time stamp.
2845 */
2846 if (rb_event_is_commit(cpu_buffer, event)) {
2847 /*
2848 * A commit event that is first on a page
2849 * updates the write timestamp with the page stamp
2850 */
2851 if (!rb_event_index(event))
2852 cpu_buffer->write_stamp =
2853 cpu_buffer->commit_page->page->time_stamp;
2854 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2855 delta = event->array[0];
2856 delta <<= TS_SHIFT;
2857 delta += event->time_delta;
2858 cpu_buffer->write_stamp += delta;
2859 } else
2860 cpu_buffer->write_stamp += event->time_delta;
2861 }
2862}
2863
2864static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2865 struct ring_buffer_event *event)
2866{
2867 local_inc(&cpu_buffer->entries);
2868 rb_update_write_stamp(cpu_buffer, event);
2869 rb_end_commit(cpu_buffer);
2870}
2871
2872static __always_inline void
2873rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
2874{
2875 bool pagebusy;
2876
2877 if (buffer->irq_work.waiters_pending) {
2878 buffer->irq_work.waiters_pending = false;
2879 /* irq_work_queue() supplies it's own memory barriers */
2880 irq_work_queue(&buffer->irq_work.work);
2881 }
2882
2883 if (cpu_buffer->irq_work.waiters_pending) {
2884 cpu_buffer->irq_work.waiters_pending = false;
2885 /* irq_work_queue() supplies it's own memory barriers */
2886 irq_work_queue(&cpu_buffer->irq_work.work);
2887 }
2888
2889 pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
2890
2891 if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
2892 cpu_buffer->irq_work.wakeup_full = true;
2893 cpu_buffer->irq_work.full_waiters_pending = false;
2894 /* irq_work_queue() supplies it's own memory barriers */
2895 irq_work_queue(&cpu_buffer->irq_work.work);
2896 }
2897}
2898
2899/**
2900 * ring_buffer_unlock_commit - commit a reserved
2901 * @buffer: The buffer to commit to
2902 * @event: The event pointer to commit.
2903 *
2904 * This commits the data to the ring buffer, and releases any locks held.
2905 *
2906 * Must be paired with ring_buffer_lock_reserve.
2907 */
2908int ring_buffer_unlock_commit(struct ring_buffer *buffer,
2909 struct ring_buffer_event *event)
2910{
2911 struct ring_buffer_per_cpu *cpu_buffer;
2912 int cpu = raw_smp_processor_id();
2913
2914 cpu_buffer = buffer->buffers[cpu];
2915
2916 rb_commit(cpu_buffer, event);
2917
2918 rb_wakeups(buffer, cpu_buffer);
2919
2920 trace_recursive_unlock(cpu_buffer);
2921
2922 preempt_enable_notrace();
2923
2924 return 0;
2925}
2926EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2927
2928static inline void rb_event_discard(struct ring_buffer_event *event)
2929{
2930 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
2931 event = skip_time_extend(event);
2932
2933 /* array[0] holds the actual length for the discarded event */
2934 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2935 event->type_len = RINGBUF_TYPE_PADDING;
2936 /* time delta must be non zero */
2937 if (!event->time_delta)
2938 event->time_delta = 1;
2939}
2940
2941/* 2933/*
2942 * Decrement the entries to the page that an event is on. 2934 * Decrement the entries to the page that an event is on.
2943 * The event does not even need to exist, only the pointer 2935 * The event does not even need to exist, only the pointer