aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c194
1 files changed, 169 insertions, 25 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 41ca394feb22..1da7b6ea8b85 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -319,6 +319,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
319#define TS_MASK ((1ULL << TS_SHIFT) - 1) 319#define TS_MASK ((1ULL << TS_SHIFT) - 1)
320#define TS_DELTA_TEST (~TS_MASK) 320#define TS_DELTA_TEST (~TS_MASK)
321 321
322/* Flag when events were overwritten */
323#define RB_MISSED_EVENTS (1 << 31)
324/* Missed count stored at end */
325#define RB_MISSED_STORED (1 << 30)
326
322struct buffer_data_page { 327struct buffer_data_page {
323 u64 time_stamp; /* page time stamp */ 328 u64 time_stamp; /* page time stamp */
324 local_t commit; /* write committed index */ 329 local_t commit; /* write committed index */
@@ -338,6 +343,7 @@ struct buffer_page {
338 local_t write; /* index for next write */ 343 local_t write; /* index for next write */
339 unsigned read; /* index for next read */ 344 unsigned read; /* index for next read */
340 local_t entries; /* entries on this page */ 345 local_t entries; /* entries on this page */
346 unsigned long real_end; /* real end of data */
341 struct buffer_data_page *page; /* Actual data page */ 347 struct buffer_data_page *page; /* Actual data page */
342}; 348};
343 349
@@ -417,6 +423,12 @@ int ring_buffer_print_page_header(struct trace_seq *s)
417 (unsigned int)sizeof(field.commit), 423 (unsigned int)sizeof(field.commit),
418 (unsigned int)is_signed_type(long)); 424 (unsigned int)is_signed_type(long));
419 425
426 ret = trace_seq_printf(s, "\tfield: int overwrite;\t"
427 "offset:%u;\tsize:%u;\tsigned:%u;\n",
428 (unsigned int)offsetof(typeof(field), commit),
429 1,
430 (unsigned int)is_signed_type(long));
431
420 ret = trace_seq_printf(s, "\tfield: char data;\t" 432 ret = trace_seq_printf(s, "\tfield: char data;\t"
421 "offset:%u;\tsize:%u;\tsigned:%u;\n", 433 "offset:%u;\tsize:%u;\tsigned:%u;\n",
422 (unsigned int)offsetof(typeof(field), data), 434 (unsigned int)offsetof(typeof(field), data),
@@ -440,6 +452,8 @@ struct ring_buffer_per_cpu {
440 struct buffer_page *tail_page; /* write to tail */ 452 struct buffer_page *tail_page; /* write to tail */
441 struct buffer_page *commit_page; /* committed pages */ 453 struct buffer_page *commit_page; /* committed pages */
442 struct buffer_page *reader_page; 454 struct buffer_page *reader_page;
455 unsigned long lost_events;
456 unsigned long last_overrun;
443 local_t commit_overrun; 457 local_t commit_overrun;
444 local_t overrun; 458 local_t overrun;
445 local_t entries; 459 local_t entries;
@@ -1754,6 +1768,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1754 * must fill the old tail_page with padding. 1768 * must fill the old tail_page with padding.
1755 */ 1769 */
1756 if (tail >= BUF_PAGE_SIZE) { 1770 if (tail >= BUF_PAGE_SIZE) {
1771 /*
1772 * If the page was filled, then we still need
1773 * to update the real_end. Reset it to zero
1774 * and the reader will ignore it.
1775 */
1776 if (tail == BUF_PAGE_SIZE)
1777 tail_page->real_end = 0;
1778
1757 local_sub(length, &tail_page->write); 1779 local_sub(length, &tail_page->write);
1758 return; 1780 return;
1759 } 1781 }
@@ -1762,6 +1784,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1762 kmemcheck_annotate_bitfield(event, bitfield); 1784 kmemcheck_annotate_bitfield(event, bitfield);
1763 1785
1764 /* 1786 /*
1787 * Save the original length to the meta data.
1788 * This will be used by the reader to add lost event
1789 * counter.
1790 */
1791 tail_page->real_end = tail;
1792
1793 /*
1765 * If this event is bigger than the minimum size, then 1794 * If this event is bigger than the minimum size, then
1766 * we need to be careful that we don't subtract the 1795 * we need to be careful that we don't subtract the
1767 * write counter enough to allow another writer to slip 1796 * write counter enough to allow another writer to slip
@@ -1979,17 +2008,13 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1979 u64 *ts, u64 *delta) 2008 u64 *ts, u64 *delta)
1980{ 2009{
1981 struct ring_buffer_event *event; 2010 struct ring_buffer_event *event;
1982 static int once;
1983 int ret; 2011 int ret;
1984 2012
1985 if (unlikely(*delta > (1ULL << 59) && !once++)) { 2013 WARN_ONCE(*delta > (1ULL << 59),
1986 printk(KERN_WARNING "Delta way too big! %llu" 2014 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
1987 " ts=%llu write stamp = %llu\n", 2015 (unsigned long long)*delta,
1988 (unsigned long long)*delta, 2016 (unsigned long long)*ts,
1989 (unsigned long long)*ts, 2017 (unsigned long long)cpu_buffer->write_stamp);
1990 (unsigned long long)cpu_buffer->write_stamp);
1991 WARN_ON(1);
1992 }
1993 2018
1994 /* 2019 /*
1995 * The delta is too big, we to add a 2020 * The delta is too big, we to add a
@@ -2838,6 +2863,7 @@ static struct buffer_page *
2838rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 2863rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2839{ 2864{
2840 struct buffer_page *reader = NULL; 2865 struct buffer_page *reader = NULL;
2866 unsigned long overwrite;
2841 unsigned long flags; 2867 unsigned long flags;
2842 int nr_loops = 0; 2868 int nr_loops = 0;
2843 int ret; 2869 int ret;
@@ -2879,6 +2905,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2879 local_set(&cpu_buffer->reader_page->write, 0); 2905 local_set(&cpu_buffer->reader_page->write, 0);
2880 local_set(&cpu_buffer->reader_page->entries, 0); 2906 local_set(&cpu_buffer->reader_page->entries, 0);
2881 local_set(&cpu_buffer->reader_page->page->commit, 0); 2907 local_set(&cpu_buffer->reader_page->page->commit, 0);
2908 cpu_buffer->reader_page->real_end = 0;
2882 2909
2883 spin: 2910 spin:
2884 /* 2911 /*
@@ -2899,6 +2926,18 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2899 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); 2926 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
2900 2927
2901 /* 2928 /*
2929 * We want to make sure we read the overruns after we set up our
2930 * pointers to the next object. The writer side does a
2931 * cmpxchg to cross pages which acts as the mb on the writer
2932 * side. Note, the reader will constantly fail the swap
2933 * while the writer is updating the pointers, so this
2934 * guarantees that the overwrite recorded here is the one we
2935 * want to compare with the last_overrun.
2936 */
2937 smp_mb();
2938 overwrite = local_read(&(cpu_buffer->overrun));
2939
2940 /*
2902 * Here's the tricky part. 2941 * Here's the tricky part.
2903 * 2942 *
2904 * We need to move the pointer past the header page. 2943 * We need to move the pointer past the header page.
@@ -2929,6 +2968,11 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2929 cpu_buffer->reader_page = reader; 2968 cpu_buffer->reader_page = reader;
2930 rb_reset_reader_page(cpu_buffer); 2969 rb_reset_reader_page(cpu_buffer);
2931 2970
2971 if (overwrite != cpu_buffer->last_overrun) {
2972 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
2973 cpu_buffer->last_overrun = overwrite;
2974 }
2975
2932 goto again; 2976 goto again;
2933 2977
2934 out: 2978 out:
@@ -3005,8 +3049,14 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
3005 rb_advance_iter(iter); 3049 rb_advance_iter(iter);
3006} 3050}
3007 3051
3052static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
3053{
3054 return cpu_buffer->lost_events;
3055}
3056
3008static struct ring_buffer_event * 3057static struct ring_buffer_event *
3009rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) 3058rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3059 unsigned long *lost_events)
3010{ 3060{
3011 struct ring_buffer_event *event; 3061 struct ring_buffer_event *event;
3012 struct buffer_page *reader; 3062 struct buffer_page *reader;
@@ -3058,6 +3108,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
3058 ring_buffer_normalize_time_stamp(cpu_buffer->buffer, 3108 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3059 cpu_buffer->cpu, ts); 3109 cpu_buffer->cpu, ts);
3060 } 3110 }
3111 if (lost_events)
3112 *lost_events = rb_lost_events(cpu_buffer);
3061 return event; 3113 return event;
3062 3114
3063 default: 3115 default:
@@ -3168,12 +3220,14 @@ static inline int rb_ok_to_lock(void)
3168 * @buffer: The ring buffer to read 3220 * @buffer: The ring buffer to read
3169 * @cpu: The cpu to peak at 3221 * @cpu: The cpu to peak at
3170 * @ts: The timestamp counter of this event. 3222 * @ts: The timestamp counter of this event.
3223 * @lost_events: a variable to store if events were lost (may be NULL)
3171 * 3224 *
3172 * This will return the event that will be read next, but does 3225 * This will return the event that will be read next, but does
3173 * not consume the data. 3226 * not consume the data.
3174 */ 3227 */
3175struct ring_buffer_event * 3228struct ring_buffer_event *
3176ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) 3229ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
3230 unsigned long *lost_events)
3177{ 3231{
3178 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 3232 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
3179 struct ring_buffer_event *event; 3233 struct ring_buffer_event *event;
@@ -3188,7 +3242,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
3188 local_irq_save(flags); 3242 local_irq_save(flags);
3189 if (dolock) 3243 if (dolock)
3190 spin_lock(&cpu_buffer->reader_lock); 3244 spin_lock(&cpu_buffer->reader_lock);
3191 event = rb_buffer_peek(cpu_buffer, ts); 3245 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3192 if (event && event->type_len == RINGBUF_TYPE_PADDING) 3246 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3193 rb_advance_reader(cpu_buffer); 3247 rb_advance_reader(cpu_buffer);
3194 if (dolock) 3248 if (dolock)
@@ -3230,13 +3284,17 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3230/** 3284/**
3231 * ring_buffer_consume - return an event and consume it 3285 * ring_buffer_consume - return an event and consume it
3232 * @buffer: The ring buffer to get the next event from 3286 * @buffer: The ring buffer to get the next event from
3287 * @cpu: the cpu to read the buffer from
3288 * @ts: a variable to store the timestamp (may be NULL)
3289 * @lost_events: a variable to store if events were lost (may be NULL)
3233 * 3290 *
3234 * Returns the next event in the ring buffer, and that event is consumed. 3291 * Returns the next event in the ring buffer, and that event is consumed.
3235 * Meaning, that sequential reads will keep returning a different event, 3292 * Meaning, that sequential reads will keep returning a different event,
3236 * and eventually empty the ring buffer if the producer is slower. 3293 * and eventually empty the ring buffer if the producer is slower.
3237 */ 3294 */
3238struct ring_buffer_event * 3295struct ring_buffer_event *
3239ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 3296ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
3297 unsigned long *lost_events)
3240{ 3298{
3241 struct ring_buffer_per_cpu *cpu_buffer; 3299 struct ring_buffer_per_cpu *cpu_buffer;
3242 struct ring_buffer_event *event = NULL; 3300 struct ring_buffer_event *event = NULL;
@@ -3257,9 +3315,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
3257 if (dolock) 3315 if (dolock)
3258 spin_lock(&cpu_buffer->reader_lock); 3316 spin_lock(&cpu_buffer->reader_lock);
3259 3317
3260 event = rb_buffer_peek(cpu_buffer, ts); 3318 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3261 if (event) 3319 if (event) {
3320 cpu_buffer->lost_events = 0;
3262 rb_advance_reader(cpu_buffer); 3321 rb_advance_reader(cpu_buffer);
3322 }
3263 3323
3264 if (dolock) 3324 if (dolock)
3265 spin_unlock(&cpu_buffer->reader_lock); 3325 spin_unlock(&cpu_buffer->reader_lock);
@@ -3276,23 +3336,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
3276EXPORT_SYMBOL_GPL(ring_buffer_consume); 3336EXPORT_SYMBOL_GPL(ring_buffer_consume);
3277 3337
3278/** 3338/**
3279 * ring_buffer_read_start - start a non consuming read of the buffer 3339 * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
3280 * @buffer: The ring buffer to read from 3340 * @buffer: The ring buffer to read from
3281 * @cpu: The cpu buffer to iterate over 3341 * @cpu: The cpu buffer to iterate over
3282 * 3342 *
3283 * This starts up an iteration through the buffer. It also disables 3343 * This performs the initial preparations necessary to iterate
3284 * the recording to the buffer until the reading is finished. 3344 * through the buffer. Memory is allocated, buffer recording
3285 * This prevents the reading from being corrupted. This is not 3345 * is disabled, and the iterator pointer is returned to the caller.
3286 * a consuming read, so a producer is not expected.
3287 * 3346 *
3288 * Must be paired with ring_buffer_finish. 3347 * Disabling buffer recordng prevents the reading from being
3348 * corrupted. This is not a consuming read, so a producer is not
3349 * expected.
3350 *
3351 * After a sequence of ring_buffer_read_prepare calls, the user is
3352 * expected to make at least one call to ring_buffer_prepare_sync.
3353 * Afterwards, ring_buffer_read_start is invoked to get things going
3354 * for real.
3355 *
3356 * This overall must be paired with ring_buffer_finish.
3289 */ 3357 */
3290struct ring_buffer_iter * 3358struct ring_buffer_iter *
3291ring_buffer_read_start(struct ring_buffer *buffer, int cpu) 3359ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
3292{ 3360{
3293 struct ring_buffer_per_cpu *cpu_buffer; 3361 struct ring_buffer_per_cpu *cpu_buffer;
3294 struct ring_buffer_iter *iter; 3362 struct ring_buffer_iter *iter;
3295 unsigned long flags;
3296 3363
3297 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3364 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3298 return NULL; 3365 return NULL;
@@ -3306,15 +3373,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
3306 iter->cpu_buffer = cpu_buffer; 3373 iter->cpu_buffer = cpu_buffer;
3307 3374
3308 atomic_inc(&cpu_buffer->record_disabled); 3375 atomic_inc(&cpu_buffer->record_disabled);
3376
3377 return iter;
3378}
3379EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
3380
3381/**
3382 * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls
3383 *
3384 * All previously invoked ring_buffer_read_prepare calls to prepare
3385 * iterators will be synchronized. Afterwards, read_buffer_read_start
3386 * calls on those iterators are allowed.
3387 */
3388void
3389ring_buffer_read_prepare_sync(void)
3390{
3309 synchronize_sched(); 3391 synchronize_sched();
3392}
3393EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
3394
3395/**
3396 * ring_buffer_read_start - start a non consuming read of the buffer
3397 * @iter: The iterator returned by ring_buffer_read_prepare
3398 *
3399 * This finalizes the startup of an iteration through the buffer.
3400 * The iterator comes from a call to ring_buffer_read_prepare and
3401 * an intervening ring_buffer_read_prepare_sync must have been
3402 * performed.
3403 *
3404 * Must be paired with ring_buffer_finish.
3405 */
3406void
3407ring_buffer_read_start(struct ring_buffer_iter *iter)
3408{
3409 struct ring_buffer_per_cpu *cpu_buffer;
3410 unsigned long flags;
3411
3412 if (!iter)
3413 return;
3414
3415 cpu_buffer = iter->cpu_buffer;
3310 3416
3311 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3417 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3312 arch_spin_lock(&cpu_buffer->lock); 3418 arch_spin_lock(&cpu_buffer->lock);
3313 rb_iter_reset(iter); 3419 rb_iter_reset(iter);
3314 arch_spin_unlock(&cpu_buffer->lock); 3420 arch_spin_unlock(&cpu_buffer->lock);
3315 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3421 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3316
3317 return iter;
3318} 3422}
3319EXPORT_SYMBOL_GPL(ring_buffer_read_start); 3423EXPORT_SYMBOL_GPL(ring_buffer_read_start);
3320 3424
@@ -3408,6 +3512,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
3408 cpu_buffer->write_stamp = 0; 3512 cpu_buffer->write_stamp = 0;
3409 cpu_buffer->read_stamp = 0; 3513 cpu_buffer->read_stamp = 0;
3410 3514
3515 cpu_buffer->lost_events = 0;
3516 cpu_buffer->last_overrun = 0;
3517
3411 rb_head_page_activate(cpu_buffer); 3518 rb_head_page_activate(cpu_buffer);
3412} 3519}
3413 3520
@@ -3683,6 +3790,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3683 struct ring_buffer_event *event; 3790 struct ring_buffer_event *event;
3684 struct buffer_data_page *bpage; 3791 struct buffer_data_page *bpage;
3685 struct buffer_page *reader; 3792 struct buffer_page *reader;
3793 unsigned long missed_events;
3686 unsigned long flags; 3794 unsigned long flags;
3687 unsigned int commit; 3795 unsigned int commit;
3688 unsigned int read; 3796 unsigned int read;
@@ -3719,6 +3827,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3719 read = reader->read; 3827 read = reader->read;
3720 commit = rb_page_commit(reader); 3828 commit = rb_page_commit(reader);
3721 3829
3830 /* Check if any events were dropped */
3831 missed_events = cpu_buffer->lost_events;
3832
3722 /* 3833 /*
3723 * If this page has been partially read or 3834 * If this page has been partially read or
3724 * if len is not big enough to read the rest of the page or 3835 * if len is not big enough to read the rest of the page or
@@ -3779,9 +3890,42 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3779 local_set(&reader->entries, 0); 3890 local_set(&reader->entries, 0);
3780 reader->read = 0; 3891 reader->read = 0;
3781 *data_page = bpage; 3892 *data_page = bpage;
3893
3894 /*
3895 * Use the real_end for the data size,
3896 * This gives us a chance to store the lost events
3897 * on the page.
3898 */
3899 if (reader->real_end)
3900 local_set(&bpage->commit, reader->real_end);
3782 } 3901 }
3783 ret = read; 3902 ret = read;
3784 3903
3904 cpu_buffer->lost_events = 0;
3905
3906 commit = local_read(&bpage->commit);
3907 /*
3908 * Set a flag in the commit field if we lost events
3909 */
3910 if (missed_events) {
3911 /* If there is room at the end of the page to save the
3912 * missed events, then record it there.
3913 */
3914 if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
3915 memcpy(&bpage->data[commit], &missed_events,
3916 sizeof(missed_events));
3917 local_add(RB_MISSED_STORED, &bpage->commit);
3918 commit += sizeof(missed_events);
3919 }
3920 local_add(RB_MISSED_EVENTS, &bpage->commit);
3921 }
3922
3923 /*
3924 * This page may be off to user land. Zero it out here.
3925 */
3926 if (commit < BUF_PAGE_SIZE)
3927 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
3928
3785 out_unlock: 3929 out_unlock:
3786 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3930 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3787 3931