diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 234 |
1 files changed, 178 insertions, 56 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 41ca394feb22..3632ce87674f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -319,6 +319,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); | |||
319 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) | 319 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) |
320 | #define TS_DELTA_TEST (~TS_MASK) | 320 | #define TS_DELTA_TEST (~TS_MASK) |
321 | 321 | ||
322 | /* Flag when events were overwritten */ | ||
323 | #define RB_MISSED_EVENTS (1 << 31) | ||
324 | /* Missed count stored at end */ | ||
325 | #define RB_MISSED_STORED (1 << 30) | ||
326 | |||
322 | struct buffer_data_page { | 327 | struct buffer_data_page { |
323 | u64 time_stamp; /* page time stamp */ | 328 | u64 time_stamp; /* page time stamp */ |
324 | local_t commit; /* write committed index */ | 329 | local_t commit; /* write committed index */ |
@@ -338,6 +343,7 @@ struct buffer_page { | |||
338 | local_t write; /* index for next write */ | 343 | local_t write; /* index for next write */ |
339 | unsigned read; /* index for next read */ | 344 | unsigned read; /* index for next read */ |
340 | local_t entries; /* entries on this page */ | 345 | local_t entries; /* entries on this page */ |
346 | unsigned long real_end; /* real end of data */ | ||
341 | struct buffer_data_page *page; /* Actual data page */ | 347 | struct buffer_data_page *page; /* Actual data page */ |
342 | }; | 348 | }; |
343 | 349 | ||
@@ -417,6 +423,12 @@ int ring_buffer_print_page_header(struct trace_seq *s) | |||
417 | (unsigned int)sizeof(field.commit), | 423 | (unsigned int)sizeof(field.commit), |
418 | (unsigned int)is_signed_type(long)); | 424 | (unsigned int)is_signed_type(long)); |
419 | 425 | ||
426 | ret = trace_seq_printf(s, "\tfield: int overwrite;\t" | ||
427 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | ||
428 | (unsigned int)offsetof(typeof(field), commit), | ||
429 | 1, | ||
430 | (unsigned int)is_signed_type(long)); | ||
431 | |||
420 | ret = trace_seq_printf(s, "\tfield: char data;\t" | 432 | ret = trace_seq_printf(s, "\tfield: char data;\t" |
421 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | 433 | "offset:%u;\tsize:%u;\tsigned:%u;\n", |
422 | (unsigned int)offsetof(typeof(field), data), | 434 | (unsigned int)offsetof(typeof(field), data), |
@@ -431,6 +443,7 @@ int ring_buffer_print_page_header(struct trace_seq *s) | |||
431 | */ | 443 | */ |
432 | struct ring_buffer_per_cpu { | 444 | struct ring_buffer_per_cpu { |
433 | int cpu; | 445 | int cpu; |
446 | atomic_t record_disabled; | ||
434 | struct ring_buffer *buffer; | 447 | struct ring_buffer *buffer; |
435 | spinlock_t reader_lock; /* serialize readers */ | 448 | spinlock_t reader_lock; /* serialize readers */ |
436 | arch_spinlock_t lock; | 449 | arch_spinlock_t lock; |
@@ -440,6 +453,8 @@ struct ring_buffer_per_cpu { | |||
440 | struct buffer_page *tail_page; /* write to tail */ | 453 | struct buffer_page *tail_page; /* write to tail */ |
441 | struct buffer_page *commit_page; /* committed pages */ | 454 | struct buffer_page *commit_page; /* committed pages */ |
442 | struct buffer_page *reader_page; | 455 | struct buffer_page *reader_page; |
456 | unsigned long lost_events; | ||
457 | unsigned long last_overrun; | ||
443 | local_t commit_overrun; | 458 | local_t commit_overrun; |
444 | local_t overrun; | 459 | local_t overrun; |
445 | local_t entries; | 460 | local_t entries; |
@@ -448,7 +463,6 @@ struct ring_buffer_per_cpu { | |||
448 | unsigned long read; | 463 | unsigned long read; |
449 | u64 write_stamp; | 464 | u64 write_stamp; |
450 | u64 read_stamp; | 465 | u64 read_stamp; |
451 | atomic_t record_disabled; | ||
452 | }; | 466 | }; |
453 | 467 | ||
454 | struct ring_buffer { | 468 | struct ring_buffer { |
@@ -1754,6 +1768,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1754 | * must fill the old tail_page with padding. | 1768 | * must fill the old tail_page with padding. |
1755 | */ | 1769 | */ |
1756 | if (tail >= BUF_PAGE_SIZE) { | 1770 | if (tail >= BUF_PAGE_SIZE) { |
1771 | /* | ||
1772 | * If the page was filled, then we still need | ||
1773 | * to update the real_end. Reset it to zero | ||
1774 | * and the reader will ignore it. | ||
1775 | */ | ||
1776 | if (tail == BUF_PAGE_SIZE) | ||
1777 | tail_page->real_end = 0; | ||
1778 | |||
1757 | local_sub(length, &tail_page->write); | 1779 | local_sub(length, &tail_page->write); |
1758 | return; | 1780 | return; |
1759 | } | 1781 | } |
@@ -1762,6 +1784,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1762 | kmemcheck_annotate_bitfield(event, bitfield); | 1784 | kmemcheck_annotate_bitfield(event, bitfield); |
1763 | 1785 | ||
1764 | /* | 1786 | /* |
1787 | * Save the original length to the meta data. | ||
1788 | * This will be used by the reader to add lost event | ||
1789 | * counter. | ||
1790 | */ | ||
1791 | tail_page->real_end = tail; | ||
1792 | |||
1793 | /* | ||
1765 | * If this event is bigger than the minimum size, then | 1794 | * If this event is bigger than the minimum size, then |
1766 | * we need to be careful that we don't subtract the | 1795 | * we need to be careful that we don't subtract the |
1767 | * write counter enough to allow another writer to slip | 1796 | * write counter enough to allow another writer to slip |
@@ -1979,17 +2008,13 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
1979 | u64 *ts, u64 *delta) | 2008 | u64 *ts, u64 *delta) |
1980 | { | 2009 | { |
1981 | struct ring_buffer_event *event; | 2010 | struct ring_buffer_event *event; |
1982 | static int once; | ||
1983 | int ret; | 2011 | int ret; |
1984 | 2012 | ||
1985 | if (unlikely(*delta > (1ULL << 59) && !once++)) { | 2013 | WARN_ONCE(*delta > (1ULL << 59), |
1986 | printk(KERN_WARNING "Delta way too big! %llu" | 2014 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", |
1987 | " ts=%llu write stamp = %llu\n", | 2015 | (unsigned long long)*delta, |
1988 | (unsigned long long)*delta, | 2016 | (unsigned long long)*ts, |
1989 | (unsigned long long)*ts, | 2017 | (unsigned long long)cpu_buffer->write_stamp); |
1990 | (unsigned long long)cpu_buffer->write_stamp); | ||
1991 | WARN_ON(1); | ||
1992 | } | ||
1993 | 2018 | ||
1994 | /* | 2019 | /* |
1995 | * The delta is too big, we to add a | 2020 | * The delta is too big, we to add a |
@@ -2217,8 +2242,6 @@ static void trace_recursive_unlock(void) | |||
2217 | 2242 | ||
2218 | #endif | 2243 | #endif |
2219 | 2244 | ||
2220 | static DEFINE_PER_CPU(int, rb_need_resched); | ||
2221 | |||
2222 | /** | 2245 | /** |
2223 | * ring_buffer_lock_reserve - reserve a part of the buffer | 2246 | * ring_buffer_lock_reserve - reserve a part of the buffer |
2224 | * @buffer: the ring buffer to reserve from | 2247 | * @buffer: the ring buffer to reserve from |
@@ -2239,13 +2262,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
2239 | { | 2262 | { |
2240 | struct ring_buffer_per_cpu *cpu_buffer; | 2263 | struct ring_buffer_per_cpu *cpu_buffer; |
2241 | struct ring_buffer_event *event; | 2264 | struct ring_buffer_event *event; |
2242 | int cpu, resched; | 2265 | int cpu; |
2243 | 2266 | ||
2244 | if (ring_buffer_flags != RB_BUFFERS_ON) | 2267 | if (ring_buffer_flags != RB_BUFFERS_ON) |
2245 | return NULL; | 2268 | return NULL; |
2246 | 2269 | ||
2247 | /* If we are tracing schedule, we don't want to recurse */ | 2270 | /* If we are tracing schedule, we don't want to recurse */ |
2248 | resched = ftrace_preempt_disable(); | 2271 | preempt_disable_notrace(); |
2249 | 2272 | ||
2250 | if (atomic_read(&buffer->record_disabled)) | 2273 | if (atomic_read(&buffer->record_disabled)) |
2251 | goto out_nocheck; | 2274 | goto out_nocheck; |
@@ -2270,21 +2293,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
2270 | if (!event) | 2293 | if (!event) |
2271 | goto out; | 2294 | goto out; |
2272 | 2295 | ||
2273 | /* | ||
2274 | * Need to store resched state on this cpu. | ||
2275 | * Only the first needs to. | ||
2276 | */ | ||
2277 | |||
2278 | if (preempt_count() == 1) | ||
2279 | per_cpu(rb_need_resched, cpu) = resched; | ||
2280 | |||
2281 | return event; | 2296 | return event; |
2282 | 2297 | ||
2283 | out: | 2298 | out: |
2284 | trace_recursive_unlock(); | 2299 | trace_recursive_unlock(); |
2285 | 2300 | ||
2286 | out_nocheck: | 2301 | out_nocheck: |
2287 | ftrace_preempt_enable(resched); | 2302 | preempt_enable_notrace(); |
2288 | return NULL; | 2303 | return NULL; |
2289 | } | 2304 | } |
2290 | EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); | 2305 | EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); |
@@ -2330,13 +2345,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, | |||
2330 | 2345 | ||
2331 | trace_recursive_unlock(); | 2346 | trace_recursive_unlock(); |
2332 | 2347 | ||
2333 | /* | 2348 | preempt_enable_notrace(); |
2334 | * Only the last preempt count needs to restore preemption. | ||
2335 | */ | ||
2336 | if (preempt_count() == 1) | ||
2337 | ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); | ||
2338 | else | ||
2339 | preempt_enable_no_resched_notrace(); | ||
2340 | 2349 | ||
2341 | return 0; | 2350 | return 0; |
2342 | } | 2351 | } |
@@ -2444,13 +2453,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, | |||
2444 | 2453 | ||
2445 | trace_recursive_unlock(); | 2454 | trace_recursive_unlock(); |
2446 | 2455 | ||
2447 | /* | 2456 | preempt_enable_notrace(); |
2448 | * Only the last preempt count needs to restore preemption. | ||
2449 | */ | ||
2450 | if (preempt_count() == 1) | ||
2451 | ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); | ||
2452 | else | ||
2453 | preempt_enable_no_resched_notrace(); | ||
2454 | 2457 | ||
2455 | } | 2458 | } |
2456 | EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); | 2459 | EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); |
@@ -2476,12 +2479,12 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
2476 | struct ring_buffer_event *event; | 2479 | struct ring_buffer_event *event; |
2477 | void *body; | 2480 | void *body; |
2478 | int ret = -EBUSY; | 2481 | int ret = -EBUSY; |
2479 | int cpu, resched; | 2482 | int cpu; |
2480 | 2483 | ||
2481 | if (ring_buffer_flags != RB_BUFFERS_ON) | 2484 | if (ring_buffer_flags != RB_BUFFERS_ON) |
2482 | return -EBUSY; | 2485 | return -EBUSY; |
2483 | 2486 | ||
2484 | resched = ftrace_preempt_disable(); | 2487 | preempt_disable_notrace(); |
2485 | 2488 | ||
2486 | if (atomic_read(&buffer->record_disabled)) | 2489 | if (atomic_read(&buffer->record_disabled)) |
2487 | goto out; | 2490 | goto out; |
@@ -2511,7 +2514,7 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
2511 | 2514 | ||
2512 | ret = 0; | 2515 | ret = 0; |
2513 | out: | 2516 | out: |
2514 | ftrace_preempt_enable(resched); | 2517 | preempt_enable_notrace(); |
2515 | 2518 | ||
2516 | return ret; | 2519 | return ret; |
2517 | } | 2520 | } |
@@ -2838,6 +2841,7 @@ static struct buffer_page * | |||
2838 | rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | 2841 | rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) |
2839 | { | 2842 | { |
2840 | struct buffer_page *reader = NULL; | 2843 | struct buffer_page *reader = NULL; |
2844 | unsigned long overwrite; | ||
2841 | unsigned long flags; | 2845 | unsigned long flags; |
2842 | int nr_loops = 0; | 2846 | int nr_loops = 0; |
2843 | int ret; | 2847 | int ret; |
@@ -2879,6 +2883,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2879 | local_set(&cpu_buffer->reader_page->write, 0); | 2883 | local_set(&cpu_buffer->reader_page->write, 0); |
2880 | local_set(&cpu_buffer->reader_page->entries, 0); | 2884 | local_set(&cpu_buffer->reader_page->entries, 0); |
2881 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 2885 | local_set(&cpu_buffer->reader_page->page->commit, 0); |
2886 | cpu_buffer->reader_page->real_end = 0; | ||
2882 | 2887 | ||
2883 | spin: | 2888 | spin: |
2884 | /* | 2889 | /* |
@@ -2899,6 +2904,18 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2899 | rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); | 2904 | rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); |
2900 | 2905 | ||
2901 | /* | 2906 | /* |
2907 | * We want to make sure we read the overruns after we set up our | ||
2908 | * pointers to the next object. The writer side does a | ||
2909 | * cmpxchg to cross pages which acts as the mb on the writer | ||
2910 | * side. Note, the reader will constantly fail the swap | ||
2911 | * while the writer is updating the pointers, so this | ||
2912 | * guarantees that the overwrite recorded here is the one we | ||
2913 | * want to compare with the last_overrun. | ||
2914 | */ | ||
2915 | smp_mb(); | ||
2916 | overwrite = local_read(&(cpu_buffer->overrun)); | ||
2917 | |||
2918 | /* | ||
2902 | * Here's the tricky part. | 2919 | * Here's the tricky part. |
2903 | * | 2920 | * |
2904 | * We need to move the pointer past the header page. | 2921 | * We need to move the pointer past the header page. |
@@ -2929,6 +2946,11 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2929 | cpu_buffer->reader_page = reader; | 2946 | cpu_buffer->reader_page = reader; |
2930 | rb_reset_reader_page(cpu_buffer); | 2947 | rb_reset_reader_page(cpu_buffer); |
2931 | 2948 | ||
2949 | if (overwrite != cpu_buffer->last_overrun) { | ||
2950 | cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun; | ||
2951 | cpu_buffer->last_overrun = overwrite; | ||
2952 | } | ||
2953 | |||
2932 | goto again; | 2954 | goto again; |
2933 | 2955 | ||
2934 | out: | 2956 | out: |
@@ -3005,8 +3027,14 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) | |||
3005 | rb_advance_iter(iter); | 3027 | rb_advance_iter(iter); |
3006 | } | 3028 | } |
3007 | 3029 | ||
3030 | static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) | ||
3031 | { | ||
3032 | return cpu_buffer->lost_events; | ||
3033 | } | ||
3034 | |||
3008 | static struct ring_buffer_event * | 3035 | static struct ring_buffer_event * |
3009 | rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) | 3036 | rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, |
3037 | unsigned long *lost_events) | ||
3010 | { | 3038 | { |
3011 | struct ring_buffer_event *event; | 3039 | struct ring_buffer_event *event; |
3012 | struct buffer_page *reader; | 3040 | struct buffer_page *reader; |
@@ -3058,6 +3086,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) | |||
3058 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, | 3086 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, |
3059 | cpu_buffer->cpu, ts); | 3087 | cpu_buffer->cpu, ts); |
3060 | } | 3088 | } |
3089 | if (lost_events) | ||
3090 | *lost_events = rb_lost_events(cpu_buffer); | ||
3061 | return event; | 3091 | return event; |
3062 | 3092 | ||
3063 | default: | 3093 | default: |
@@ -3168,12 +3198,14 @@ static inline int rb_ok_to_lock(void) | |||
3168 | * @buffer: The ring buffer to read | 3198 | * @buffer: The ring buffer to read |
3169 | * @cpu: The cpu to peak at | 3199 | * @cpu: The cpu to peak at |
3170 | * @ts: The timestamp counter of this event. | 3200 | * @ts: The timestamp counter of this event. |
3201 | * @lost_events: a variable to store if events were lost (may be NULL) | ||
3171 | * | 3202 | * |
3172 | * This will return the event that will be read next, but does | 3203 | * This will return the event that will be read next, but does |
3173 | * not consume the data. | 3204 | * not consume the data. |
3174 | */ | 3205 | */ |
3175 | struct ring_buffer_event * | 3206 | struct ring_buffer_event * |
3176 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | 3207 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, |
3208 | unsigned long *lost_events) | ||
3177 | { | 3209 | { |
3178 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 3210 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
3179 | struct ring_buffer_event *event; | 3211 | struct ring_buffer_event *event; |
@@ -3188,7 +3220,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
3188 | local_irq_save(flags); | 3220 | local_irq_save(flags); |
3189 | if (dolock) | 3221 | if (dolock) |
3190 | spin_lock(&cpu_buffer->reader_lock); | 3222 | spin_lock(&cpu_buffer->reader_lock); |
3191 | event = rb_buffer_peek(cpu_buffer, ts); | 3223 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); |
3192 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 3224 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
3193 | rb_advance_reader(cpu_buffer); | 3225 | rb_advance_reader(cpu_buffer); |
3194 | if (dolock) | 3226 | if (dolock) |
@@ -3230,13 +3262,17 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
3230 | /** | 3262 | /** |
3231 | * ring_buffer_consume - return an event and consume it | 3263 | * ring_buffer_consume - return an event and consume it |
3232 | * @buffer: The ring buffer to get the next event from | 3264 | * @buffer: The ring buffer to get the next event from |
3265 | * @cpu: the cpu to read the buffer from | ||
3266 | * @ts: a variable to store the timestamp (may be NULL) | ||
3267 | * @lost_events: a variable to store if events were lost (may be NULL) | ||
3233 | * | 3268 | * |
3234 | * Returns the next event in the ring buffer, and that event is consumed. | 3269 | * Returns the next event in the ring buffer, and that event is consumed. |
3235 | * Meaning, that sequential reads will keep returning a different event, | 3270 | * Meaning, that sequential reads will keep returning a different event, |
3236 | * and eventually empty the ring buffer if the producer is slower. | 3271 | * and eventually empty the ring buffer if the producer is slower. |
3237 | */ | 3272 | */ |
3238 | struct ring_buffer_event * | 3273 | struct ring_buffer_event * |
3239 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | 3274 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, |
3275 | unsigned long *lost_events) | ||
3240 | { | 3276 | { |
3241 | struct ring_buffer_per_cpu *cpu_buffer; | 3277 | struct ring_buffer_per_cpu *cpu_buffer; |
3242 | struct ring_buffer_event *event = NULL; | 3278 | struct ring_buffer_event *event = NULL; |
@@ -3257,9 +3293,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
3257 | if (dolock) | 3293 | if (dolock) |
3258 | spin_lock(&cpu_buffer->reader_lock); | 3294 | spin_lock(&cpu_buffer->reader_lock); |
3259 | 3295 | ||
3260 | event = rb_buffer_peek(cpu_buffer, ts); | 3296 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); |
3261 | if (event) | 3297 | if (event) { |
3298 | cpu_buffer->lost_events = 0; | ||
3262 | rb_advance_reader(cpu_buffer); | 3299 | rb_advance_reader(cpu_buffer); |
3300 | } | ||
3263 | 3301 | ||
3264 | if (dolock) | 3302 | if (dolock) |
3265 | spin_unlock(&cpu_buffer->reader_lock); | 3303 | spin_unlock(&cpu_buffer->reader_lock); |
@@ -3276,23 +3314,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
3276 | EXPORT_SYMBOL_GPL(ring_buffer_consume); | 3314 | EXPORT_SYMBOL_GPL(ring_buffer_consume); |
3277 | 3315 | ||
3278 | /** | 3316 | /** |
3279 | * ring_buffer_read_start - start a non consuming read of the buffer | 3317 | * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer |
3280 | * @buffer: The ring buffer to read from | 3318 | * @buffer: The ring buffer to read from |
3281 | * @cpu: The cpu buffer to iterate over | 3319 | * @cpu: The cpu buffer to iterate over |
3282 | * | 3320 | * |
3283 | * This starts up an iteration through the buffer. It also disables | 3321 | * This performs the initial preparations necessary to iterate |
3284 | * the recording to the buffer until the reading is finished. | 3322 | * through the buffer. Memory is allocated, buffer recording |
3285 | * This prevents the reading from being corrupted. This is not | 3323 | * is disabled, and the iterator pointer is returned to the caller. |
3286 | * a consuming read, so a producer is not expected. | ||
3287 | * | 3324 | * |
3288 | * Must be paired with ring_buffer_finish. | 3325 | * Disabling buffer recordng prevents the reading from being |
3326 | * corrupted. This is not a consuming read, so a producer is not | ||
3327 | * expected. | ||
3328 | * | ||
3329 | * After a sequence of ring_buffer_read_prepare calls, the user is | ||
3330 | * expected to make at least one call to ring_buffer_prepare_sync. | ||
3331 | * Afterwards, ring_buffer_read_start is invoked to get things going | ||
3332 | * for real. | ||
3333 | * | ||
3334 | * This overall must be paired with ring_buffer_finish. | ||
3289 | */ | 3335 | */ |
3290 | struct ring_buffer_iter * | 3336 | struct ring_buffer_iter * |
3291 | ring_buffer_read_start(struct ring_buffer *buffer, int cpu) | 3337 | ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) |
3292 | { | 3338 | { |
3293 | struct ring_buffer_per_cpu *cpu_buffer; | 3339 | struct ring_buffer_per_cpu *cpu_buffer; |
3294 | struct ring_buffer_iter *iter; | 3340 | struct ring_buffer_iter *iter; |
3295 | unsigned long flags; | ||
3296 | 3341 | ||
3297 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 3342 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
3298 | return NULL; | 3343 | return NULL; |
@@ -3306,15 +3351,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu) | |||
3306 | iter->cpu_buffer = cpu_buffer; | 3351 | iter->cpu_buffer = cpu_buffer; |
3307 | 3352 | ||
3308 | atomic_inc(&cpu_buffer->record_disabled); | 3353 | atomic_inc(&cpu_buffer->record_disabled); |
3354 | |||
3355 | return iter; | ||
3356 | } | ||
3357 | EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); | ||
3358 | |||
3359 | /** | ||
3360 | * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls | ||
3361 | * | ||
3362 | * All previously invoked ring_buffer_read_prepare calls to prepare | ||
3363 | * iterators will be synchronized. Afterwards, read_buffer_read_start | ||
3364 | * calls on those iterators are allowed. | ||
3365 | */ | ||
3366 | void | ||
3367 | ring_buffer_read_prepare_sync(void) | ||
3368 | { | ||
3309 | synchronize_sched(); | 3369 | synchronize_sched(); |
3370 | } | ||
3371 | EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); | ||
3372 | |||
3373 | /** | ||
3374 | * ring_buffer_read_start - start a non consuming read of the buffer | ||
3375 | * @iter: The iterator returned by ring_buffer_read_prepare | ||
3376 | * | ||
3377 | * This finalizes the startup of an iteration through the buffer. | ||
3378 | * The iterator comes from a call to ring_buffer_read_prepare and | ||
3379 | * an intervening ring_buffer_read_prepare_sync must have been | ||
3380 | * performed. | ||
3381 | * | ||
3382 | * Must be paired with ring_buffer_finish. | ||
3383 | */ | ||
3384 | void | ||
3385 | ring_buffer_read_start(struct ring_buffer_iter *iter) | ||
3386 | { | ||
3387 | struct ring_buffer_per_cpu *cpu_buffer; | ||
3388 | unsigned long flags; | ||
3389 | |||
3390 | if (!iter) | ||
3391 | return; | ||
3392 | |||
3393 | cpu_buffer = iter->cpu_buffer; | ||
3310 | 3394 | ||
3311 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3395 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
3312 | arch_spin_lock(&cpu_buffer->lock); | 3396 | arch_spin_lock(&cpu_buffer->lock); |
3313 | rb_iter_reset(iter); | 3397 | rb_iter_reset(iter); |
3314 | arch_spin_unlock(&cpu_buffer->lock); | 3398 | arch_spin_unlock(&cpu_buffer->lock); |
3315 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3399 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3316 | |||
3317 | return iter; | ||
3318 | } | 3400 | } |
3319 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); | 3401 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); |
3320 | 3402 | ||
@@ -3408,6 +3490,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | |||
3408 | cpu_buffer->write_stamp = 0; | 3490 | cpu_buffer->write_stamp = 0; |
3409 | cpu_buffer->read_stamp = 0; | 3491 | cpu_buffer->read_stamp = 0; |
3410 | 3492 | ||
3493 | cpu_buffer->lost_events = 0; | ||
3494 | cpu_buffer->last_overrun = 0; | ||
3495 | |||
3411 | rb_head_page_activate(cpu_buffer); | 3496 | rb_head_page_activate(cpu_buffer); |
3412 | } | 3497 | } |
3413 | 3498 | ||
@@ -3683,6 +3768,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3683 | struct ring_buffer_event *event; | 3768 | struct ring_buffer_event *event; |
3684 | struct buffer_data_page *bpage; | 3769 | struct buffer_data_page *bpage; |
3685 | struct buffer_page *reader; | 3770 | struct buffer_page *reader; |
3771 | unsigned long missed_events; | ||
3686 | unsigned long flags; | 3772 | unsigned long flags; |
3687 | unsigned int commit; | 3773 | unsigned int commit; |
3688 | unsigned int read; | 3774 | unsigned int read; |
@@ -3719,6 +3805,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3719 | read = reader->read; | 3805 | read = reader->read; |
3720 | commit = rb_page_commit(reader); | 3806 | commit = rb_page_commit(reader); |
3721 | 3807 | ||
3808 | /* Check if any events were dropped */ | ||
3809 | missed_events = cpu_buffer->lost_events; | ||
3810 | |||
3722 | /* | 3811 | /* |
3723 | * If this page has been partially read or | 3812 | * If this page has been partially read or |
3724 | * if len is not big enough to read the rest of the page or | 3813 | * if len is not big enough to read the rest of the page or |
@@ -3779,9 +3868,42 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3779 | local_set(&reader->entries, 0); | 3868 | local_set(&reader->entries, 0); |
3780 | reader->read = 0; | 3869 | reader->read = 0; |
3781 | *data_page = bpage; | 3870 | *data_page = bpage; |
3871 | |||
3872 | /* | ||
3873 | * Use the real_end for the data size, | ||
3874 | * This gives us a chance to store the lost events | ||
3875 | * on the page. | ||
3876 | */ | ||
3877 | if (reader->real_end) | ||
3878 | local_set(&bpage->commit, reader->real_end); | ||
3782 | } | 3879 | } |
3783 | ret = read; | 3880 | ret = read; |
3784 | 3881 | ||
3882 | cpu_buffer->lost_events = 0; | ||
3883 | |||
3884 | commit = local_read(&bpage->commit); | ||
3885 | /* | ||
3886 | * Set a flag in the commit field if we lost events | ||
3887 | */ | ||
3888 | if (missed_events) { | ||
3889 | /* If there is room at the end of the page to save the | ||
3890 | * missed events, then record it there. | ||
3891 | */ | ||
3892 | if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) { | ||
3893 | memcpy(&bpage->data[commit], &missed_events, | ||
3894 | sizeof(missed_events)); | ||
3895 | local_add(RB_MISSED_STORED, &bpage->commit); | ||
3896 | commit += sizeof(missed_events); | ||
3897 | } | ||
3898 | local_add(RB_MISSED_EVENTS, &bpage->commit); | ||
3899 | } | ||
3900 | |||
3901 | /* | ||
3902 | * This page may be off to user land. Zero it out here. | ||
3903 | */ | ||
3904 | if (commit < BUF_PAGE_SIZE) | ||
3905 | memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit); | ||
3906 | |||
3785 | out_unlock: | 3907 | out_unlock: |
3786 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3908 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3787 | 3909 | ||