diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 101 |
1 files changed, 95 insertions, 6 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 41ca394feb22..5885cdfc41f3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -319,6 +319,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); | |||
319 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) | 319 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) |
320 | #define TS_DELTA_TEST (~TS_MASK) | 320 | #define TS_DELTA_TEST (~TS_MASK) |
321 | 321 | ||
322 | /* Flag when events were overwritten */ | ||
323 | #define RB_MISSED_EVENTS (1 << 31) | ||
324 | /* Missed count stored at end */ | ||
325 | #define RB_MISSED_STORED (1 << 30) | ||
326 | |||
322 | struct buffer_data_page { | 327 | struct buffer_data_page { |
323 | u64 time_stamp; /* page time stamp */ | 328 | u64 time_stamp; /* page time stamp */ |
324 | local_t commit; /* write committed index */ | 329 | local_t commit; /* write committed index */ |
@@ -338,6 +343,7 @@ struct buffer_page { | |||
338 | local_t write; /* index for next write */ | 343 | local_t write; /* index for next write */ |
339 | unsigned read; /* index for next read */ | 344 | unsigned read; /* index for next read */ |
340 | local_t entries; /* entries on this page */ | 345 | local_t entries; /* entries on this page */ |
346 | unsigned long real_end; /* real end of data */ | ||
341 | struct buffer_data_page *page; /* Actual data page */ | 347 | struct buffer_data_page *page; /* Actual data page */ |
342 | }; | 348 | }; |
343 | 349 | ||
@@ -417,6 +423,12 @@ int ring_buffer_print_page_header(struct trace_seq *s) | |||
417 | (unsigned int)sizeof(field.commit), | 423 | (unsigned int)sizeof(field.commit), |
418 | (unsigned int)is_signed_type(long)); | 424 | (unsigned int)is_signed_type(long)); |
419 | 425 | ||
426 | ret = trace_seq_printf(s, "\tfield: int overwrite;\t" | ||
427 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | ||
428 | (unsigned int)offsetof(typeof(field), commit), | ||
429 | 1, | ||
430 | (unsigned int)is_signed_type(long)); | ||
431 | |||
420 | ret = trace_seq_printf(s, "\tfield: char data;\t" | 432 | ret = trace_seq_printf(s, "\tfield: char data;\t" |
421 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | 433 | "offset:%u;\tsize:%u;\tsigned:%u;\n", |
422 | (unsigned int)offsetof(typeof(field), data), | 434 | (unsigned int)offsetof(typeof(field), data), |
@@ -440,6 +452,8 @@ struct ring_buffer_per_cpu { | |||
440 | struct buffer_page *tail_page; /* write to tail */ | 452 | struct buffer_page *tail_page; /* write to tail */ |
441 | struct buffer_page *commit_page; /* committed pages */ | 453 | struct buffer_page *commit_page; /* committed pages */ |
442 | struct buffer_page *reader_page; | 454 | struct buffer_page *reader_page; |
455 | unsigned long lost_events; | ||
456 | unsigned long last_overrun; | ||
443 | local_t commit_overrun; | 457 | local_t commit_overrun; |
444 | local_t overrun; | 458 | local_t overrun; |
445 | local_t entries; | 459 | local_t entries; |
@@ -1762,6 +1776,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1762 | kmemcheck_annotate_bitfield(event, bitfield); | 1776 | kmemcheck_annotate_bitfield(event, bitfield); |
1763 | 1777 | ||
1764 | /* | 1778 | /* |
1779 | * Save the original length to the meta data. | ||
1780 | * This will be used by the reader to add lost event | ||
1781 | * counter. | ||
1782 | */ | ||
1783 | tail_page->real_end = tail; | ||
1784 | |||
1785 | /* | ||
1765 | * If this event is bigger than the minimum size, then | 1786 | * If this event is bigger than the minimum size, then |
1766 | * we need to be careful that we don't subtract the | 1787 | * we need to be careful that we don't subtract the |
1767 | * write counter enough to allow another writer to slip | 1788 | * write counter enough to allow another writer to slip |
@@ -2838,6 +2859,7 @@ static struct buffer_page * | |||
2838 | rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | 2859 | rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) |
2839 | { | 2860 | { |
2840 | struct buffer_page *reader = NULL; | 2861 | struct buffer_page *reader = NULL; |
2862 | unsigned long overwrite; | ||
2841 | unsigned long flags; | 2863 | unsigned long flags; |
2842 | int nr_loops = 0; | 2864 | int nr_loops = 0; |
2843 | int ret; | 2865 | int ret; |
@@ -2879,6 +2901,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2879 | local_set(&cpu_buffer->reader_page->write, 0); | 2901 | local_set(&cpu_buffer->reader_page->write, 0); |
2880 | local_set(&cpu_buffer->reader_page->entries, 0); | 2902 | local_set(&cpu_buffer->reader_page->entries, 0); |
2881 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 2903 | local_set(&cpu_buffer->reader_page->page->commit, 0); |
2904 | cpu_buffer->reader_page->real_end = 0; | ||
2882 | 2905 | ||
2883 | spin: | 2906 | spin: |
2884 | /* | 2907 | /* |
@@ -2899,6 +2922,18 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2899 | rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); | 2922 | rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); |
2900 | 2923 | ||
2901 | /* | 2924 | /* |
2925 | * We want to make sure we read the overruns after we set up our | ||
2926 | * pointers to the next object. The writer side does a | ||
2927 | * cmpxchg to cross pages which acts as the mb on the writer | ||
2928 | * side. Note, the reader will constantly fail the swap | ||
2929 | * while the writer is updating the pointers, so this | ||
2930 | * guarantees that the overwrite recorded here is the one we | ||
2931 | * want to compare with the last_overrun. | ||
2932 | */ | ||
2933 | smp_mb(); | ||
2934 | overwrite = local_read(&(cpu_buffer->overrun)); | ||
2935 | |||
2936 | /* | ||
2902 | * Here's the tricky part. | 2937 | * Here's the tricky part. |
2903 | * | 2938 | * |
2904 | * We need to move the pointer past the header page. | 2939 | * We need to move the pointer past the header page. |
@@ -2929,6 +2964,11 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2929 | cpu_buffer->reader_page = reader; | 2964 | cpu_buffer->reader_page = reader; |
2930 | rb_reset_reader_page(cpu_buffer); | 2965 | rb_reset_reader_page(cpu_buffer); |
2931 | 2966 | ||
2967 | if (overwrite != cpu_buffer->last_overrun) { | ||
2968 | cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun; | ||
2969 | cpu_buffer->last_overrun = overwrite; | ||
2970 | } | ||
2971 | |||
2932 | goto again; | 2972 | goto again; |
2933 | 2973 | ||
2934 | out: | 2974 | out: |
@@ -3005,8 +3045,14 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) | |||
3005 | rb_advance_iter(iter); | 3045 | rb_advance_iter(iter); |
3006 | } | 3046 | } |
3007 | 3047 | ||
3048 | static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) | ||
3049 | { | ||
3050 | return cpu_buffer->lost_events; | ||
3051 | } | ||
3052 | |||
3008 | static struct ring_buffer_event * | 3053 | static struct ring_buffer_event * |
3009 | rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) | 3054 | rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, |
3055 | unsigned long *lost_events) | ||
3010 | { | 3056 | { |
3011 | struct ring_buffer_event *event; | 3057 | struct ring_buffer_event *event; |
3012 | struct buffer_page *reader; | 3058 | struct buffer_page *reader; |
@@ -3058,6 +3104,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) | |||
3058 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, | 3104 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, |
3059 | cpu_buffer->cpu, ts); | 3105 | cpu_buffer->cpu, ts); |
3060 | } | 3106 | } |
3107 | if (lost_events) | ||
3108 | *lost_events = rb_lost_events(cpu_buffer); | ||
3061 | return event; | 3109 | return event; |
3062 | 3110 | ||
3063 | default: | 3111 | default: |
@@ -3168,12 +3216,14 @@ static inline int rb_ok_to_lock(void) | |||
3168 | * @buffer: The ring buffer to read | 3216 | * @buffer: The ring buffer to read |
3169 | * @cpu: The cpu to peak at | 3217 | * @cpu: The cpu to peak at |
3170 | * @ts: The timestamp counter of this event. | 3218 | * @ts: The timestamp counter of this event. |
3219 | * @lost_events: a variable to store if events were lost (may be NULL) | ||
3171 | * | 3220 | * |
3172 | * This will return the event that will be read next, but does | 3221 | * This will return the event that will be read next, but does |
3173 | * not consume the data. | 3222 | * not consume the data. |
3174 | */ | 3223 | */ |
3175 | struct ring_buffer_event * | 3224 | struct ring_buffer_event * |
3176 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | 3225 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, |
3226 | unsigned long *lost_events) | ||
3177 | { | 3227 | { |
3178 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 3228 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
3179 | struct ring_buffer_event *event; | 3229 | struct ring_buffer_event *event; |
@@ -3188,7 +3238,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
3188 | local_irq_save(flags); | 3238 | local_irq_save(flags); |
3189 | if (dolock) | 3239 | if (dolock) |
3190 | spin_lock(&cpu_buffer->reader_lock); | 3240 | spin_lock(&cpu_buffer->reader_lock); |
3191 | event = rb_buffer_peek(cpu_buffer, ts); | 3241 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); |
3192 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 3242 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
3193 | rb_advance_reader(cpu_buffer); | 3243 | rb_advance_reader(cpu_buffer); |
3194 | if (dolock) | 3244 | if (dolock) |
@@ -3230,13 +3280,17 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
3230 | /** | 3280 | /** |
3231 | * ring_buffer_consume - return an event and consume it | 3281 | * ring_buffer_consume - return an event and consume it |
3232 | * @buffer: The ring buffer to get the next event from | 3282 | * @buffer: The ring buffer to get the next event from |
3283 | * @cpu: the cpu to read the buffer from | ||
3284 | * @ts: a variable to store the timestamp (may be NULL) | ||
3285 | * @lost_events: a variable to store if events were lost (may be NULL) | ||
3233 | * | 3286 | * |
3234 | * Returns the next event in the ring buffer, and that event is consumed. | 3287 | * Returns the next event in the ring buffer, and that event is consumed. |
3235 | * Meaning, that sequential reads will keep returning a different event, | 3288 | * Meaning, that sequential reads will keep returning a different event, |
3236 | * and eventually empty the ring buffer if the producer is slower. | 3289 | * and eventually empty the ring buffer if the producer is slower. |
3237 | */ | 3290 | */ |
3238 | struct ring_buffer_event * | 3291 | struct ring_buffer_event * |
3239 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | 3292 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, |
3293 | unsigned long *lost_events) | ||
3240 | { | 3294 | { |
3241 | struct ring_buffer_per_cpu *cpu_buffer; | 3295 | struct ring_buffer_per_cpu *cpu_buffer; |
3242 | struct ring_buffer_event *event = NULL; | 3296 | struct ring_buffer_event *event = NULL; |
@@ -3257,9 +3311,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
3257 | if (dolock) | 3311 | if (dolock) |
3258 | spin_lock(&cpu_buffer->reader_lock); | 3312 | spin_lock(&cpu_buffer->reader_lock); |
3259 | 3313 | ||
3260 | event = rb_buffer_peek(cpu_buffer, ts); | 3314 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); |
3261 | if (event) | 3315 | if (event) { |
3316 | cpu_buffer->lost_events = 0; | ||
3262 | rb_advance_reader(cpu_buffer); | 3317 | rb_advance_reader(cpu_buffer); |
3318 | } | ||
3263 | 3319 | ||
3264 | if (dolock) | 3320 | if (dolock) |
3265 | spin_unlock(&cpu_buffer->reader_lock); | 3321 | spin_unlock(&cpu_buffer->reader_lock); |
@@ -3408,6 +3464,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | |||
3408 | cpu_buffer->write_stamp = 0; | 3464 | cpu_buffer->write_stamp = 0; |
3409 | cpu_buffer->read_stamp = 0; | 3465 | cpu_buffer->read_stamp = 0; |
3410 | 3466 | ||
3467 | cpu_buffer->lost_events = 0; | ||
3468 | cpu_buffer->last_overrun = 0; | ||
3469 | |||
3411 | rb_head_page_activate(cpu_buffer); | 3470 | rb_head_page_activate(cpu_buffer); |
3412 | } | 3471 | } |
3413 | 3472 | ||
@@ -3683,6 +3742,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3683 | struct ring_buffer_event *event; | 3742 | struct ring_buffer_event *event; |
3684 | struct buffer_data_page *bpage; | 3743 | struct buffer_data_page *bpage; |
3685 | struct buffer_page *reader; | 3744 | struct buffer_page *reader; |
3745 | unsigned long missed_events; | ||
3686 | unsigned long flags; | 3746 | unsigned long flags; |
3687 | unsigned int commit; | 3747 | unsigned int commit; |
3688 | unsigned int read; | 3748 | unsigned int read; |
@@ -3719,6 +3779,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3719 | read = reader->read; | 3779 | read = reader->read; |
3720 | commit = rb_page_commit(reader); | 3780 | commit = rb_page_commit(reader); |
3721 | 3781 | ||
3782 | /* Check if any events were dropped */ | ||
3783 | missed_events = cpu_buffer->lost_events; | ||
3784 | |||
3722 | /* | 3785 | /* |
3723 | * If this page has been partially read or | 3786 | * If this page has been partially read or |
3724 | * if len is not big enough to read the rest of the page or | 3787 | * if len is not big enough to read the rest of the page or |
@@ -3779,9 +3842,35 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3779 | local_set(&reader->entries, 0); | 3842 | local_set(&reader->entries, 0); |
3780 | reader->read = 0; | 3843 | reader->read = 0; |
3781 | *data_page = bpage; | 3844 | *data_page = bpage; |
3845 | |||
3846 | /* | ||
3847 | * Use the real_end for the data size, | ||
3848 | * This gives us a chance to store the lost events | ||
3849 | * on the page. | ||
3850 | */ | ||
3851 | if (reader->real_end) | ||
3852 | local_set(&bpage->commit, reader->real_end); | ||
3782 | } | 3853 | } |
3783 | ret = read; | 3854 | ret = read; |
3784 | 3855 | ||
3856 | cpu_buffer->lost_events = 0; | ||
3857 | /* | ||
3858 | * Set a flag in the commit field if we lost events | ||
3859 | */ | ||
3860 | if (missed_events) { | ||
3861 | commit = local_read(&bpage->commit); | ||
3862 | |||
3863 | /* If there is room at the end of the page to save the | ||
3864 | * missed events, then record it there. | ||
3865 | */ | ||
3866 | if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) { | ||
3867 | memcpy(&bpage->data[commit], &missed_events, | ||
3868 | sizeof(missed_events)); | ||
3869 | local_add(RB_MISSED_STORED, &bpage->commit); | ||
3870 | } | ||
3871 | local_add(RB_MISSED_EVENTS, &bpage->commit); | ||
3872 | } | ||
3873 | |||
3785 | out_unlock: | 3874 | out_unlock: |
3786 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3875 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3787 | 3876 | ||