diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
| -rw-r--r-- | kernel/trace/ring_buffer.c | 777 |
1 files changed, 506 insertions, 271 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 960cbf44c844..2e642b2b7253 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
| @@ -22,6 +22,28 @@ | |||
| 22 | #include "trace.h" | 22 | #include "trace.h" |
| 23 | 23 | ||
| 24 | /* | 24 | /* |
| 25 | * The ring buffer header is special. We must manually up keep it. | ||
| 26 | */ | ||
| 27 | int ring_buffer_print_entry_header(struct trace_seq *s) | ||
| 28 | { | ||
| 29 | int ret; | ||
| 30 | |||
| 31 | ret = trace_seq_printf(s, "# compressed entry header\n"); | ||
| 32 | ret = trace_seq_printf(s, "\ttype_len : 5 bits\n"); | ||
| 33 | ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n"); | ||
| 34 | ret = trace_seq_printf(s, "\tarray : 32 bits\n"); | ||
| 35 | ret = trace_seq_printf(s, "\n"); | ||
| 36 | ret = trace_seq_printf(s, "\tpadding : type == %d\n", | ||
| 37 | RINGBUF_TYPE_PADDING); | ||
| 38 | ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", | ||
| 39 | RINGBUF_TYPE_TIME_EXTEND); | ||
| 40 | ret = trace_seq_printf(s, "\tdata max type_len == %d\n", | ||
| 41 | RINGBUF_TYPE_DATA_TYPE_LEN_MAX); | ||
| 42 | |||
| 43 | return ret; | ||
| 44 | } | ||
| 45 | |||
| 46 | /* | ||
| 25 | * The ring buffer is made up of a list of pages. A separate list of pages is | 47 | * The ring buffer is made up of a list of pages. A separate list of pages is |
| 26 | * allocated for each CPU. A writer may only write to a buffer that is | 48 | * allocated for each CPU. A writer may only write to a buffer that is |
| 27 | * associated with the CPU it is currently executing on. A reader may read | 49 | * associated with the CPU it is currently executing on. A reader may read |
| @@ -182,7 +204,10 @@ EXPORT_SYMBOL_GPL(tracing_is_on); | |||
| 182 | 204 | ||
| 183 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) | 205 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) |
| 184 | #define RB_ALIGNMENT 4U | 206 | #define RB_ALIGNMENT 4U |
| 185 | #define RB_MAX_SMALL_DATA 28 | 207 | #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
| 208 | |||
| 209 | /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ | ||
| 210 | #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX | ||
| 186 | 211 | ||
| 187 | enum { | 212 | enum { |
| 188 | RB_LEN_TIME_EXTEND = 8, | 213 | RB_LEN_TIME_EXTEND = 8, |
| @@ -191,48 +216,28 @@ enum { | |||
| 191 | 216 | ||
| 192 | static inline int rb_null_event(struct ring_buffer_event *event) | 217 | static inline int rb_null_event(struct ring_buffer_event *event) |
| 193 | { | 218 | { |
| 194 | return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0; | 219 | return event->type_len == RINGBUF_TYPE_PADDING |
| 220 | && event->time_delta == 0; | ||
| 195 | } | 221 | } |
| 196 | 222 | ||
| 197 | static inline int rb_discarded_event(struct ring_buffer_event *event) | 223 | static inline int rb_discarded_event(struct ring_buffer_event *event) |
| 198 | { | 224 | { |
| 199 | return event->type == RINGBUF_TYPE_PADDING && event->time_delta; | 225 | return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta; |
| 200 | } | 226 | } |
| 201 | 227 | ||
| 202 | static void rb_event_set_padding(struct ring_buffer_event *event) | 228 | static void rb_event_set_padding(struct ring_buffer_event *event) |
| 203 | { | 229 | { |
| 204 | event->type = RINGBUF_TYPE_PADDING; | 230 | event->type_len = RINGBUF_TYPE_PADDING; |
| 205 | event->time_delta = 0; | 231 | event->time_delta = 0; |
| 206 | } | 232 | } |
| 207 | 233 | ||
| 208 | /** | ||
| 209 | * ring_buffer_event_discard - discard an event in the ring buffer | ||
| 210 | * @buffer: the ring buffer | ||
| 211 | * @event: the event to discard | ||
| 212 | * | ||
| 213 | * Sometimes a event that is in the ring buffer needs to be ignored. | ||
| 214 | * This function lets the user discard an event in the ring buffer | ||
| 215 | * and then that event will not be read later. | ||
| 216 | * | ||
| 217 | * Note, it is up to the user to be careful with this, and protect | ||
| 218 | * against races. If the user discards an event that has been consumed | ||
| 219 | * it is possible that it could corrupt the ring buffer. | ||
| 220 | */ | ||
| 221 | void ring_buffer_event_discard(struct ring_buffer_event *event) | ||
| 222 | { | ||
| 223 | event->type = RINGBUF_TYPE_PADDING; | ||
| 224 | /* time delta must be non zero */ | ||
| 225 | if (!event->time_delta) | ||
| 226 | event->time_delta = 1; | ||
| 227 | } | ||
| 228 | |||
| 229 | static unsigned | 234 | static unsigned |
| 230 | rb_event_data_length(struct ring_buffer_event *event) | 235 | rb_event_data_length(struct ring_buffer_event *event) |
| 231 | { | 236 | { |
| 232 | unsigned length; | 237 | unsigned length; |
| 233 | 238 | ||
| 234 | if (event->len) | 239 | if (event->type_len) |
| 235 | length = event->len * RB_ALIGNMENT; | 240 | length = event->type_len * RB_ALIGNMENT; |
| 236 | else | 241 | else |
| 237 | length = event->array[0]; | 242 | length = event->array[0]; |
| 238 | return length + RB_EVNT_HDR_SIZE; | 243 | return length + RB_EVNT_HDR_SIZE; |
| @@ -242,12 +247,12 @@ rb_event_data_length(struct ring_buffer_event *event) | |||
| 242 | static unsigned | 247 | static unsigned |
| 243 | rb_event_length(struct ring_buffer_event *event) | 248 | rb_event_length(struct ring_buffer_event *event) |
| 244 | { | 249 | { |
| 245 | switch (event->type) { | 250 | switch (event->type_len) { |
| 246 | case RINGBUF_TYPE_PADDING: | 251 | case RINGBUF_TYPE_PADDING: |
| 247 | if (rb_null_event(event)) | 252 | if (rb_null_event(event)) |
| 248 | /* undefined */ | 253 | /* undefined */ |
| 249 | return -1; | 254 | return -1; |
| 250 | return rb_event_data_length(event); | 255 | return event->array[0] + RB_EVNT_HDR_SIZE; |
| 251 | 256 | ||
| 252 | case RINGBUF_TYPE_TIME_EXTEND: | 257 | case RINGBUF_TYPE_TIME_EXTEND: |
| 253 | return RB_LEN_TIME_EXTEND; | 258 | return RB_LEN_TIME_EXTEND; |
| @@ -271,7 +276,7 @@ rb_event_length(struct ring_buffer_event *event) | |||
| 271 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) | 276 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) |
| 272 | { | 277 | { |
| 273 | unsigned length = rb_event_length(event); | 278 | unsigned length = rb_event_length(event); |
| 274 | if (event->type != RINGBUF_TYPE_DATA) | 279 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
| 275 | return length; | 280 | return length; |
| 276 | length -= RB_EVNT_HDR_SIZE; | 281 | length -= RB_EVNT_HDR_SIZE; |
| 277 | if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) | 282 | if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) |
| @@ -284,9 +289,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length); | |||
| 284 | static void * | 289 | static void * |
| 285 | rb_event_data(struct ring_buffer_event *event) | 290 | rb_event_data(struct ring_buffer_event *event) |
| 286 | { | 291 | { |
| 287 | BUG_ON(event->type != RINGBUF_TYPE_DATA); | 292 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); |
| 288 | /* If length is in len field, then array[0] has the data */ | 293 | /* If length is in len field, then array[0] has the data */ |
| 289 | if (event->len) | 294 | if (event->type_len) |
| 290 | return (void *)&event->array[0]; | 295 | return (void *)&event->array[0]; |
| 291 | /* Otherwise length is in array[0] and array[1] has the data */ | 296 | /* Otherwise length is in array[0] and array[1] has the data */ |
| 292 | return (void *)&event->array[1]; | 297 | return (void *)&event->array[1]; |
| @@ -316,9 +321,10 @@ struct buffer_data_page { | |||
| 316 | }; | 321 | }; |
| 317 | 322 | ||
| 318 | struct buffer_page { | 323 | struct buffer_page { |
| 324 | struct list_head list; /* list of buffer pages */ | ||
| 319 | local_t write; /* index for next write */ | 325 | local_t write; /* index for next write */ |
| 320 | unsigned read; /* index for next read */ | 326 | unsigned read; /* index for next read */ |
| 321 | struct list_head list; /* list of free pages */ | 327 | local_t entries; /* entries on this page */ |
| 322 | struct buffer_data_page *page; /* Actual data page */ | 328 | struct buffer_data_page *page; /* Actual data page */ |
| 323 | }; | 329 | }; |
| 324 | 330 | ||
| @@ -361,6 +367,34 @@ static inline int test_time_stamp(u64 delta) | |||
| 361 | 367 | ||
| 362 | #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) | 368 | #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) |
| 363 | 369 | ||
| 370 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ | ||
| 371 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) | ||
| 372 | |||
| 373 | /* Max number of timestamps that can fit on a page */ | ||
| 374 | #define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_STAMP) | ||
| 375 | |||
| 376 | int ring_buffer_print_page_header(struct trace_seq *s) | ||
| 377 | { | ||
| 378 | struct buffer_data_page field; | ||
| 379 | int ret; | ||
| 380 | |||
| 381 | ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" | ||
| 382 | "offset:0;\tsize:%u;\n", | ||
| 383 | (unsigned int)sizeof(field.time_stamp)); | ||
| 384 | |||
| 385 | ret = trace_seq_printf(s, "\tfield: local_t commit;\t" | ||
| 386 | "offset:%u;\tsize:%u;\n", | ||
| 387 | (unsigned int)offsetof(typeof(field), commit), | ||
| 388 | (unsigned int)sizeof(field.commit)); | ||
| 389 | |||
| 390 | ret = trace_seq_printf(s, "\tfield: char data;\t" | ||
| 391 | "offset:%u;\tsize:%u;\n", | ||
| 392 | (unsigned int)offsetof(typeof(field), data), | ||
| 393 | (unsigned int)BUF_PAGE_SIZE); | ||
| 394 | |||
| 395 | return ret; | ||
| 396 | } | ||
| 397 | |||
| 364 | /* | 398 | /* |
| 365 | * head_page == tail_page && head == tail then buffer is empty. | 399 | * head_page == tail_page && head == tail then buffer is empty. |
| 366 | */ | 400 | */ |
| @@ -375,8 +409,11 @@ struct ring_buffer_per_cpu { | |||
| 375 | struct buffer_page *tail_page; /* write to tail */ | 409 | struct buffer_page *tail_page; /* write to tail */ |
| 376 | struct buffer_page *commit_page; /* committed pages */ | 410 | struct buffer_page *commit_page; /* committed pages */ |
| 377 | struct buffer_page *reader_page; | 411 | struct buffer_page *reader_page; |
| 412 | unsigned long nmi_dropped; | ||
| 413 | unsigned long commit_overrun; | ||
| 378 | unsigned long overrun; | 414 | unsigned long overrun; |
| 379 | unsigned long entries; | 415 | unsigned long read; |
| 416 | local_t entries; | ||
| 380 | u64 write_stamp; | 417 | u64 write_stamp; |
| 381 | u64 read_stamp; | 418 | u64 read_stamp; |
| 382 | atomic_t record_disabled; | 419 | atomic_t record_disabled; |
| @@ -389,6 +426,8 @@ struct ring_buffer { | |||
| 389 | atomic_t record_disabled; | 426 | atomic_t record_disabled; |
| 390 | cpumask_var_t cpumask; | 427 | cpumask_var_t cpumask; |
| 391 | 428 | ||
| 429 | struct lock_class_key *reader_lock_key; | ||
| 430 | |||
| 392 | struct mutex mutex; | 431 | struct mutex mutex; |
| 393 | 432 | ||
| 394 | struct ring_buffer_per_cpu **buffers; | 433 | struct ring_buffer_per_cpu **buffers; |
| @@ -420,13 +459,18 @@ struct ring_buffer_iter { | |||
| 420 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | 459 | /* Up this if you want to test the TIME_EXTENTS and normalization */ |
| 421 | #define DEBUG_SHIFT 0 | 460 | #define DEBUG_SHIFT 0 |
| 422 | 461 | ||
| 462 | static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu) | ||
| 463 | { | ||
| 464 | /* shift to debug/test normalization and TIME_EXTENTS */ | ||
| 465 | return buffer->clock() << DEBUG_SHIFT; | ||
| 466 | } | ||
| 467 | |||
| 423 | u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) | 468 | u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) |
| 424 | { | 469 | { |
| 425 | u64 time; | 470 | u64 time; |
| 426 | 471 | ||
| 427 | preempt_disable_notrace(); | 472 | preempt_disable_notrace(); |
| 428 | /* shift to debug/test normalization and TIME_EXTENTS */ | 473 | time = rb_time_stamp(buffer, cpu); |
| 429 | time = buffer->clock() << DEBUG_SHIFT; | ||
| 430 | preempt_enable_no_resched_notrace(); | 474 | preempt_enable_no_resched_notrace(); |
| 431 | 475 | ||
| 432 | return time; | 476 | return time; |
| @@ -523,6 +567,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
| 523 | cpu_buffer->cpu = cpu; | 567 | cpu_buffer->cpu = cpu; |
| 524 | cpu_buffer->buffer = buffer; | 568 | cpu_buffer->buffer = buffer; |
| 525 | spin_lock_init(&cpu_buffer->reader_lock); | 569 | spin_lock_init(&cpu_buffer->reader_lock); |
| 570 | lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); | ||
| 526 | cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 571 | cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; |
| 527 | INIT_LIST_HEAD(&cpu_buffer->pages); | 572 | INIT_LIST_HEAD(&cpu_buffer->pages); |
| 528 | 573 | ||
| @@ -593,7 +638,8 @@ static int rb_cpu_notify(struct notifier_block *self, | |||
| 593 | * when the buffer wraps. If this flag is not set, the buffer will | 638 | * when the buffer wraps. If this flag is not set, the buffer will |
| 594 | * drop data when the tail hits the head. | 639 | * drop data when the tail hits the head. |
| 595 | */ | 640 | */ |
| 596 | struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) | 641 | struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, |
| 642 | struct lock_class_key *key) | ||
| 597 | { | 643 | { |
| 598 | struct ring_buffer *buffer; | 644 | struct ring_buffer *buffer; |
| 599 | int bsize; | 645 | int bsize; |
| @@ -616,6 +662,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) | |||
| 616 | buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 662 | buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
| 617 | buffer->flags = flags; | 663 | buffer->flags = flags; |
| 618 | buffer->clock = trace_clock_local; | 664 | buffer->clock = trace_clock_local; |
| 665 | buffer->reader_lock_key = key; | ||
| 619 | 666 | ||
| 620 | /* need at least two pages */ | 667 | /* need at least two pages */ |
| 621 | if (buffer->pages == 1) | 668 | if (buffer->pages == 1) |
| @@ -673,7 +720,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) | |||
| 673 | kfree(buffer); | 720 | kfree(buffer); |
| 674 | return NULL; | 721 | return NULL; |
| 675 | } | 722 | } |
| 676 | EXPORT_SYMBOL_GPL(ring_buffer_alloc); | 723 | EXPORT_SYMBOL_GPL(__ring_buffer_alloc); |
| 677 | 724 | ||
| 678 | /** | 725 | /** |
| 679 | * ring_buffer_free - free a ring buffer. | 726 | * ring_buffer_free - free a ring buffer. |
| @@ -947,31 +994,6 @@ static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 947 | return rb_page_commit(cpu_buffer->head_page); | 994 | return rb_page_commit(cpu_buffer->head_page); |
| 948 | } | 995 | } |
| 949 | 996 | ||
| 950 | /* | ||
| 951 | * When the tail hits the head and the buffer is in overwrite mode, | ||
| 952 | * the head jumps to the next page and all content on the previous | ||
| 953 | * page is discarded. But before doing so, we update the overrun | ||
| 954 | * variable of the buffer. | ||
| 955 | */ | ||
| 956 | static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer) | ||
| 957 | { | ||
| 958 | struct ring_buffer_event *event; | ||
| 959 | unsigned long head; | ||
| 960 | |||
| 961 | for (head = 0; head < rb_head_size(cpu_buffer); | ||
| 962 | head += rb_event_length(event)) { | ||
| 963 | |||
| 964 | event = __rb_page_index(cpu_buffer->head_page, head); | ||
| 965 | if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) | ||
| 966 | return; | ||
| 967 | /* Only count data entries */ | ||
| 968 | if (event->type != RINGBUF_TYPE_DATA) | ||
| 969 | continue; | ||
| 970 | cpu_buffer->overrun++; | ||
| 971 | cpu_buffer->entries--; | ||
| 972 | } | ||
| 973 | } | ||
| 974 | |||
| 975 | static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, | 997 | static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, |
| 976 | struct buffer_page **bpage) | 998 | struct buffer_page **bpage) |
| 977 | { | 999 | { |
| @@ -991,7 +1013,7 @@ rb_event_index(struct ring_buffer_event *event) | |||
| 991 | return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); | 1013 | return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); |
| 992 | } | 1014 | } |
| 993 | 1015 | ||
| 994 | static int | 1016 | static inline int |
| 995 | rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | 1017 | rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, |
| 996 | struct ring_buffer_event *event) | 1018 | struct ring_buffer_event *event) |
| 997 | { | 1019 | { |
| @@ -1110,28 +1132,21 @@ static void | |||
| 1110 | rb_update_event(struct ring_buffer_event *event, | 1132 | rb_update_event(struct ring_buffer_event *event, |
| 1111 | unsigned type, unsigned length) | 1133 | unsigned type, unsigned length) |
| 1112 | { | 1134 | { |
| 1113 | event->type = type; | 1135 | event->type_len = type; |
| 1114 | 1136 | ||
| 1115 | switch (type) { | 1137 | switch (type) { |
| 1116 | 1138 | ||
| 1117 | case RINGBUF_TYPE_PADDING: | 1139 | case RINGBUF_TYPE_PADDING: |
| 1118 | break; | ||
| 1119 | |||
| 1120 | case RINGBUF_TYPE_TIME_EXTEND: | 1140 | case RINGBUF_TYPE_TIME_EXTEND: |
| 1121 | event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT); | ||
| 1122 | break; | ||
| 1123 | |||
| 1124 | case RINGBUF_TYPE_TIME_STAMP: | 1141 | case RINGBUF_TYPE_TIME_STAMP: |
| 1125 | event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT); | ||
| 1126 | break; | 1142 | break; |
| 1127 | 1143 | ||
| 1128 | case RINGBUF_TYPE_DATA: | 1144 | case 0: |
| 1129 | length -= RB_EVNT_HDR_SIZE; | 1145 | length -= RB_EVNT_HDR_SIZE; |
| 1130 | if (length > RB_MAX_SMALL_DATA) { | 1146 | if (length > RB_MAX_SMALL_DATA) |
| 1131 | event->len = 0; | ||
| 1132 | event->array[0] = length; | 1147 | event->array[0] = length; |
| 1133 | } else | 1148 | else |
| 1134 | event->len = DIV_ROUND_UP(length, RB_ALIGNMENT); | 1149 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); |
| 1135 | break; | 1150 | break; |
| 1136 | default: | 1151 | default: |
| 1137 | BUG(); | 1152 | BUG(); |
| @@ -1155,131 +1170,156 @@ static unsigned rb_calculate_event_length(unsigned length) | |||
| 1155 | return length; | 1170 | return length; |
| 1156 | } | 1171 | } |
| 1157 | 1172 | ||
| 1173 | |||
| 1158 | static struct ring_buffer_event * | 1174 | static struct ring_buffer_event * |
| 1159 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | 1175 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, |
| 1160 | unsigned type, unsigned long length, u64 *ts) | 1176 | unsigned long length, unsigned long tail, |
| 1177 | struct buffer_page *commit_page, | ||
| 1178 | struct buffer_page *tail_page, u64 *ts) | ||
| 1161 | { | 1179 | { |
| 1162 | struct buffer_page *tail_page, *head_page, *reader_page, *commit_page; | 1180 | struct buffer_page *next_page, *head_page, *reader_page; |
| 1163 | unsigned long tail, write; | ||
| 1164 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1181 | struct ring_buffer *buffer = cpu_buffer->buffer; |
| 1165 | struct ring_buffer_event *event; | 1182 | struct ring_buffer_event *event; |
| 1166 | unsigned long flags; | ||
| 1167 | bool lock_taken = false; | 1183 | bool lock_taken = false; |
| 1184 | unsigned long flags; | ||
| 1168 | 1185 | ||
| 1169 | commit_page = cpu_buffer->commit_page; | 1186 | next_page = tail_page; |
| 1170 | /* we just need to protect against interrupts */ | ||
| 1171 | barrier(); | ||
| 1172 | tail_page = cpu_buffer->tail_page; | ||
| 1173 | write = local_add_return(length, &tail_page->write); | ||
| 1174 | tail = write - length; | ||
| 1175 | 1187 | ||
| 1176 | /* See if we shot pass the end of this buffer page */ | 1188 | local_irq_save(flags); |
| 1177 | if (write > BUF_PAGE_SIZE) { | 1189 | /* |
| 1178 | struct buffer_page *next_page = tail_page; | 1190 | * Since the write to the buffer is still not |
| 1191 | * fully lockless, we must be careful with NMIs. | ||
| 1192 | * The locks in the writers are taken when a write | ||
| 1193 | * crosses to a new page. The locks protect against | ||
| 1194 | * races with the readers (this will soon be fixed | ||
| 1195 | * with a lockless solution). | ||
| 1196 | * | ||
| 1197 | * Because we can not protect against NMIs, and we | ||
| 1198 | * want to keep traces reentrant, we need to manage | ||
| 1199 | * what happens when we are in an NMI. | ||
| 1200 | * | ||
| 1201 | * NMIs can happen after we take the lock. | ||
| 1202 | * If we are in an NMI, only take the lock | ||
| 1203 | * if it is not already taken. Otherwise | ||
| 1204 | * simply fail. | ||
| 1205 | */ | ||
| 1206 | if (unlikely(in_nmi())) { | ||
| 1207 | if (!__raw_spin_trylock(&cpu_buffer->lock)) { | ||
| 1208 | cpu_buffer->nmi_dropped++; | ||
| 1209 | goto out_reset; | ||
| 1210 | } | ||
| 1211 | } else | ||
| 1212 | __raw_spin_lock(&cpu_buffer->lock); | ||
| 1179 | 1213 | ||
| 1180 | local_irq_save(flags); | 1214 | lock_taken = true; |
| 1181 | /* | ||
| 1182 | * Since the write to the buffer is still not | ||
| 1183 | * fully lockless, we must be careful with NMIs. | ||
| 1184 | * The locks in the writers are taken when a write | ||
| 1185 | * crosses to a new page. The locks protect against | ||
| 1186 | * races with the readers (this will soon be fixed | ||
| 1187 | * with a lockless solution). | ||
| 1188 | * | ||
| 1189 | * Because we can not protect against NMIs, and we | ||
| 1190 | * want to keep traces reentrant, we need to manage | ||
| 1191 | * what happens when we are in an NMI. | ||
| 1192 | * | ||
| 1193 | * NMIs can happen after we take the lock. | ||
| 1194 | * If we are in an NMI, only take the lock | ||
| 1195 | * if it is not already taken. Otherwise | ||
| 1196 | * simply fail. | ||
| 1197 | */ | ||
| 1198 | if (unlikely(in_nmi())) { | ||
| 1199 | if (!__raw_spin_trylock(&cpu_buffer->lock)) | ||
| 1200 | goto out_reset; | ||
| 1201 | } else | ||
| 1202 | __raw_spin_lock(&cpu_buffer->lock); | ||
| 1203 | 1215 | ||
| 1204 | lock_taken = true; | 1216 | rb_inc_page(cpu_buffer, &next_page); |
| 1205 | 1217 | ||
| 1206 | rb_inc_page(cpu_buffer, &next_page); | 1218 | head_page = cpu_buffer->head_page; |
| 1219 | reader_page = cpu_buffer->reader_page; | ||
| 1207 | 1220 | ||
| 1208 | head_page = cpu_buffer->head_page; | 1221 | /* we grabbed the lock before incrementing */ |
| 1209 | reader_page = cpu_buffer->reader_page; | 1222 | if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) |
| 1223 | goto out_reset; | ||
| 1210 | 1224 | ||
| 1211 | /* we grabbed the lock before incrementing */ | 1225 | /* |
| 1212 | if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) | 1226 | * If for some reason, we had an interrupt storm that made |
| 1213 | goto out_reset; | 1227 | * it all the way around the buffer, bail, and warn |
| 1228 | * about it. | ||
| 1229 | */ | ||
| 1230 | if (unlikely(next_page == commit_page)) { | ||
| 1231 | cpu_buffer->commit_overrun++; | ||
| 1232 | goto out_reset; | ||
| 1233 | } | ||
| 1214 | 1234 | ||
| 1215 | /* | 1235 | if (next_page == head_page) { |
| 1216 | * If for some reason, we had an interrupt storm that made | 1236 | if (!(buffer->flags & RB_FL_OVERWRITE)) |
| 1217 | * it all the way around the buffer, bail, and warn | ||
| 1218 | * about it. | ||
| 1219 | */ | ||
| 1220 | if (unlikely(next_page == commit_page)) { | ||
| 1221 | WARN_ON_ONCE(1); | ||
| 1222 | goto out_reset; | 1237 | goto out_reset; |
| 1223 | } | ||
| 1224 | 1238 | ||
| 1225 | if (next_page == head_page) { | 1239 | /* tail_page has not moved yet? */ |
| 1226 | if (!(buffer->flags & RB_FL_OVERWRITE)) | 1240 | if (tail_page == cpu_buffer->tail_page) { |
| 1227 | goto out_reset; | 1241 | /* count overflows */ |
| 1228 | 1242 | cpu_buffer->overrun += | |
| 1229 | /* tail_page has not moved yet? */ | 1243 | local_read(&head_page->entries); |
| 1230 | if (tail_page == cpu_buffer->tail_page) { | ||
| 1231 | /* count overflows */ | ||
| 1232 | rb_update_overflow(cpu_buffer); | ||
| 1233 | 1244 | ||
| 1234 | rb_inc_page(cpu_buffer, &head_page); | 1245 | rb_inc_page(cpu_buffer, &head_page); |
| 1235 | cpu_buffer->head_page = head_page; | 1246 | cpu_buffer->head_page = head_page; |
| 1236 | cpu_buffer->head_page->read = 0; | 1247 | cpu_buffer->head_page->read = 0; |
| 1237 | } | ||
| 1238 | } | 1248 | } |
| 1249 | } | ||
| 1239 | 1250 | ||
| 1240 | /* | 1251 | /* |
| 1241 | * If the tail page is still the same as what we think | 1252 | * If the tail page is still the same as what we think |
| 1242 | * it is, then it is up to us to update the tail | 1253 | * it is, then it is up to us to update the tail |
| 1243 | * pointer. | 1254 | * pointer. |
| 1244 | */ | 1255 | */ |
| 1245 | if (tail_page == cpu_buffer->tail_page) { | 1256 | if (tail_page == cpu_buffer->tail_page) { |
| 1246 | local_set(&next_page->write, 0); | 1257 | local_set(&next_page->write, 0); |
| 1247 | local_set(&next_page->page->commit, 0); | 1258 | local_set(&next_page->entries, 0); |
| 1248 | cpu_buffer->tail_page = next_page; | 1259 | local_set(&next_page->page->commit, 0); |
| 1260 | cpu_buffer->tail_page = next_page; | ||
| 1261 | |||
| 1262 | /* reread the time stamp */ | ||
| 1263 | *ts = rb_time_stamp(buffer, cpu_buffer->cpu); | ||
| 1264 | cpu_buffer->tail_page->page->time_stamp = *ts; | ||
| 1265 | } | ||
| 1249 | 1266 | ||
| 1250 | /* reread the time stamp */ | 1267 | /* |
| 1251 | *ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu); | 1268 | * The actual tail page has moved forward. |
| 1252 | cpu_buffer->tail_page->page->time_stamp = *ts; | 1269 | */ |
| 1253 | } | 1270 | if (tail < BUF_PAGE_SIZE) { |
| 1271 | /* Mark the rest of the page with padding */ | ||
| 1272 | event = __rb_page_index(tail_page, tail); | ||
| 1273 | rb_event_set_padding(event); | ||
| 1274 | } | ||
| 1254 | 1275 | ||
| 1255 | /* | 1276 | /* Set the write back to the previous setting */ |
| 1256 | * The actual tail page has moved forward. | 1277 | local_sub(length, &tail_page->write); |
| 1257 | */ | ||
| 1258 | if (tail < BUF_PAGE_SIZE) { | ||
| 1259 | /* Mark the rest of the page with padding */ | ||
| 1260 | event = __rb_page_index(tail_page, tail); | ||
| 1261 | rb_event_set_padding(event); | ||
| 1262 | } | ||
| 1263 | 1278 | ||
| 1264 | if (tail <= BUF_PAGE_SIZE) | 1279 | /* |
| 1265 | /* Set the write back to the previous setting */ | 1280 | * If this was a commit entry that failed, |
| 1266 | local_set(&tail_page->write, tail); | 1281 | * increment that too |
| 1282 | */ | ||
| 1283 | if (tail_page == cpu_buffer->commit_page && | ||
| 1284 | tail == rb_commit_index(cpu_buffer)) { | ||
| 1285 | rb_set_commit_to_write(cpu_buffer); | ||
| 1286 | } | ||
| 1267 | 1287 | ||
| 1268 | /* | 1288 | __raw_spin_unlock(&cpu_buffer->lock); |
| 1269 | * If this was a commit entry that failed, | 1289 | local_irq_restore(flags); |
| 1270 | * increment that too | 1290 | |
| 1271 | */ | 1291 | /* fail and let the caller try again */ |
| 1272 | if (tail_page == cpu_buffer->commit_page && | 1292 | return ERR_PTR(-EAGAIN); |
| 1273 | tail == rb_commit_index(cpu_buffer)) { | 1293 | |
| 1274 | rb_set_commit_to_write(cpu_buffer); | 1294 | out_reset: |
| 1275 | } | 1295 | /* reset write */ |
| 1296 | local_sub(length, &tail_page->write); | ||
| 1276 | 1297 | ||
| 1298 | if (likely(lock_taken)) | ||
| 1277 | __raw_spin_unlock(&cpu_buffer->lock); | 1299 | __raw_spin_unlock(&cpu_buffer->lock); |
| 1278 | local_irq_restore(flags); | 1300 | local_irq_restore(flags); |
| 1301 | return NULL; | ||
| 1302 | } | ||
| 1279 | 1303 | ||
| 1280 | /* fail and let the caller try again */ | 1304 | static struct ring_buffer_event * |
| 1281 | return ERR_PTR(-EAGAIN); | 1305 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, |
| 1282 | } | 1306 | unsigned type, unsigned long length, u64 *ts) |
| 1307 | { | ||
| 1308 | struct buffer_page *tail_page, *commit_page; | ||
| 1309 | struct ring_buffer_event *event; | ||
| 1310 | unsigned long tail, write; | ||
| 1311 | |||
| 1312 | commit_page = cpu_buffer->commit_page; | ||
| 1313 | /* we just need to protect against interrupts */ | ||
| 1314 | barrier(); | ||
| 1315 | tail_page = cpu_buffer->tail_page; | ||
| 1316 | write = local_add_return(length, &tail_page->write); | ||
| 1317 | tail = write - length; | ||
| 1318 | |||
| 1319 | /* See if we shot pass the end of this buffer page */ | ||
| 1320 | if (write > BUF_PAGE_SIZE) | ||
| 1321 | return rb_move_tail(cpu_buffer, length, tail, | ||
| 1322 | commit_page, tail_page, ts); | ||
| 1283 | 1323 | ||
| 1284 | /* We reserved something on the buffer */ | 1324 | /* We reserved something on the buffer */ |
| 1285 | 1325 | ||
| @@ -1289,6 +1329,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1289 | event = __rb_page_index(tail_page, tail); | 1329 | event = __rb_page_index(tail_page, tail); |
| 1290 | rb_update_event(event, type, length); | 1330 | rb_update_event(event, type, length); |
| 1291 | 1331 | ||
| 1332 | /* The passed in type is zero for DATA */ | ||
| 1333 | if (likely(!type)) | ||
| 1334 | local_inc(&tail_page->entries); | ||
| 1335 | |||
| 1292 | /* | 1336 | /* |
| 1293 | * If this is a commit and the tail is zero, then update | 1337 | * If this is a commit and the tail is zero, then update |
| 1294 | * this page's time stamp. | 1338 | * this page's time stamp. |
| @@ -1297,16 +1341,38 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1297 | cpu_buffer->commit_page->page->time_stamp = *ts; | 1341 | cpu_buffer->commit_page->page->time_stamp = *ts; |
| 1298 | 1342 | ||
| 1299 | return event; | 1343 | return event; |
| 1344 | } | ||
| 1300 | 1345 | ||
| 1301 | out_reset: | 1346 | static inline int |
| 1302 | /* reset write */ | 1347 | rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, |
| 1303 | if (tail <= BUF_PAGE_SIZE) | 1348 | struct ring_buffer_event *event) |
| 1304 | local_set(&tail_page->write, tail); | 1349 | { |
| 1350 | unsigned long new_index, old_index; | ||
| 1351 | struct buffer_page *bpage; | ||
| 1352 | unsigned long index; | ||
| 1353 | unsigned long addr; | ||
| 1305 | 1354 | ||
| 1306 | if (likely(lock_taken)) | 1355 | new_index = rb_event_index(event); |
| 1307 | __raw_spin_unlock(&cpu_buffer->lock); | 1356 | old_index = new_index + rb_event_length(event); |
| 1308 | local_irq_restore(flags); | 1357 | addr = (unsigned long)event; |
| 1309 | return NULL; | 1358 | addr &= PAGE_MASK; |
| 1359 | |||
| 1360 | bpage = cpu_buffer->tail_page; | ||
| 1361 | |||
| 1362 | if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { | ||
| 1363 | /* | ||
| 1364 | * This is on the tail page. It is possible that | ||
| 1365 | * a write could come in and move the tail page | ||
| 1366 | * and write to the next page. That is fine | ||
| 1367 | * because we just shorten what is on this page. | ||
| 1368 | */ | ||
| 1369 | index = local_cmpxchg(&bpage->write, old_index, new_index); | ||
| 1370 | if (index == old_index) | ||
| 1371 | return 1; | ||
| 1372 | } | ||
| 1373 | |||
| 1374 | /* could not discard */ | ||
| 1375 | return 0; | ||
| 1310 | } | 1376 | } |
| 1311 | 1377 | ||
| 1312 | static int | 1378 | static int |
| @@ -1351,16 +1417,23 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1351 | event->array[0] = *delta >> TS_SHIFT; | 1417 | event->array[0] = *delta >> TS_SHIFT; |
| 1352 | } else { | 1418 | } else { |
| 1353 | cpu_buffer->commit_page->page->time_stamp = *ts; | 1419 | cpu_buffer->commit_page->page->time_stamp = *ts; |
| 1354 | event->time_delta = 0; | 1420 | /* try to discard, since we do not need this */ |
| 1355 | event->array[0] = 0; | 1421 | if (!rb_try_to_discard(cpu_buffer, event)) { |
| 1422 | /* nope, just zero it */ | ||
| 1423 | event->time_delta = 0; | ||
| 1424 | event->array[0] = 0; | ||
| 1425 | } | ||
| 1356 | } | 1426 | } |
| 1357 | cpu_buffer->write_stamp = *ts; | 1427 | cpu_buffer->write_stamp = *ts; |
| 1358 | /* let the caller know this was the commit */ | 1428 | /* let the caller know this was the commit */ |
| 1359 | ret = 1; | 1429 | ret = 1; |
| 1360 | } else { | 1430 | } else { |
| 1361 | /* Darn, this is just wasted space */ | 1431 | /* Try to discard the event */ |
| 1362 | event->time_delta = 0; | 1432 | if (!rb_try_to_discard(cpu_buffer, event)) { |
| 1363 | event->array[0] = 0; | 1433 | /* Darn, this is just wasted space */ |
| 1434 | event->time_delta = 0; | ||
| 1435 | event->array[0] = 0; | ||
| 1436 | } | ||
| 1364 | ret = 0; | 1437 | ret = 0; |
| 1365 | } | 1438 | } |
| 1366 | 1439 | ||
| @@ -1371,13 +1444,14 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1371 | 1444 | ||
| 1372 | static struct ring_buffer_event * | 1445 | static struct ring_buffer_event * |
| 1373 | rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | 1446 | rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, |
| 1374 | unsigned type, unsigned long length) | 1447 | unsigned long length) |
| 1375 | { | 1448 | { |
| 1376 | struct ring_buffer_event *event; | 1449 | struct ring_buffer_event *event; |
| 1377 | u64 ts, delta; | 1450 | u64 ts, delta = 0; |
| 1378 | int commit = 0; | 1451 | int commit = 0; |
| 1379 | int nr_loops = 0; | 1452 | int nr_loops = 0; |
| 1380 | 1453 | ||
| 1454 | length = rb_calculate_event_length(length); | ||
| 1381 | again: | 1455 | again: |
| 1382 | /* | 1456 | /* |
| 1383 | * We allow for interrupts to reenter here and do a trace. | 1457 | * We allow for interrupts to reenter here and do a trace. |
| @@ -1391,7 +1465,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1391 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) | 1465 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) |
| 1392 | return NULL; | 1466 | return NULL; |
| 1393 | 1467 | ||
| 1394 | ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); | 1468 | ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); |
| 1395 | 1469 | ||
| 1396 | /* | 1470 | /* |
| 1397 | * Only the first commit can update the timestamp. | 1471 | * Only the first commit can update the timestamp. |
| @@ -1401,23 +1475,24 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1401 | * also be made. But only the entry that did the actual | 1475 | * also be made. But only the entry that did the actual |
| 1402 | * commit will be something other than zero. | 1476 | * commit will be something other than zero. |
| 1403 | */ | 1477 | */ |
| 1404 | if (cpu_buffer->tail_page == cpu_buffer->commit_page && | 1478 | if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page && |
| 1405 | rb_page_write(cpu_buffer->tail_page) == | 1479 | rb_page_write(cpu_buffer->tail_page) == |
| 1406 | rb_commit_index(cpu_buffer)) { | 1480 | rb_commit_index(cpu_buffer))) { |
| 1481 | u64 diff; | ||
| 1407 | 1482 | ||
| 1408 | delta = ts - cpu_buffer->write_stamp; | 1483 | diff = ts - cpu_buffer->write_stamp; |
| 1409 | 1484 | ||
| 1410 | /* make sure this delta is calculated here */ | 1485 | /* make sure this diff is calculated here */ |
| 1411 | barrier(); | 1486 | barrier(); |
| 1412 | 1487 | ||
| 1413 | /* Did the write stamp get updated already? */ | 1488 | /* Did the write stamp get updated already? */ |
| 1414 | if (unlikely(ts < cpu_buffer->write_stamp)) | 1489 | if (unlikely(ts < cpu_buffer->write_stamp)) |
| 1415 | delta = 0; | 1490 | goto get_event; |
| 1416 | 1491 | ||
| 1417 | if (test_time_stamp(delta)) { | 1492 | delta = diff; |
| 1493 | if (unlikely(test_time_stamp(delta))) { | ||
| 1418 | 1494 | ||
| 1419 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); | 1495 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); |
| 1420 | |||
| 1421 | if (commit == -EBUSY) | 1496 | if (commit == -EBUSY) |
| 1422 | return NULL; | 1497 | return NULL; |
| 1423 | 1498 | ||
| @@ -1426,12 +1501,11 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1426 | 1501 | ||
| 1427 | RB_WARN_ON(cpu_buffer, commit < 0); | 1502 | RB_WARN_ON(cpu_buffer, commit < 0); |
| 1428 | } | 1503 | } |
| 1429 | } else | 1504 | } |
| 1430 | /* Non commits have zero deltas */ | ||
| 1431 | delta = 0; | ||
| 1432 | 1505 | ||
| 1433 | event = __rb_reserve_next(cpu_buffer, type, length, &ts); | 1506 | get_event: |
| 1434 | if (PTR_ERR(event) == -EAGAIN) | 1507 | event = __rb_reserve_next(cpu_buffer, 0, length, &ts); |
| 1508 | if (unlikely(PTR_ERR(event) == -EAGAIN)) | ||
| 1435 | goto again; | 1509 | goto again; |
| 1436 | 1510 | ||
| 1437 | if (!event) { | 1511 | if (!event) { |
| @@ -1448,7 +1522,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1448 | * If the timestamp was commited, make the commit our entry | 1522 | * If the timestamp was commited, make the commit our entry |
| 1449 | * now so that we will update it when needed. | 1523 | * now so that we will update it when needed. |
| 1450 | */ | 1524 | */ |
| 1451 | if (commit) | 1525 | if (unlikely(commit)) |
| 1452 | rb_set_commit_event(cpu_buffer, event); | 1526 | rb_set_commit_event(cpu_buffer, event); |
| 1453 | else if (!rb_is_commit(cpu_buffer, event)) | 1527 | else if (!rb_is_commit(cpu_buffer, event)) |
| 1454 | delta = 0; | 1528 | delta = 0; |
| @@ -1458,6 +1532,36 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1458 | return event; | 1532 | return event; |
| 1459 | } | 1533 | } |
| 1460 | 1534 | ||
| 1535 | #define TRACE_RECURSIVE_DEPTH 16 | ||
| 1536 | |||
| 1537 | static int trace_recursive_lock(void) | ||
| 1538 | { | ||
| 1539 | current->trace_recursion++; | ||
| 1540 | |||
| 1541 | if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) | ||
| 1542 | return 0; | ||
| 1543 | |||
| 1544 | /* Disable all tracing before we do anything else */ | ||
| 1545 | tracing_off_permanent(); | ||
| 1546 | |||
| 1547 | printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" | ||
| 1548 | "HC[%lu]:SC[%lu]:NMI[%lu]\n", | ||
| 1549 | current->trace_recursion, | ||
| 1550 | hardirq_count() >> HARDIRQ_SHIFT, | ||
| 1551 | softirq_count() >> SOFTIRQ_SHIFT, | ||
| 1552 | in_nmi()); | ||
| 1553 | |||
| 1554 | WARN_ON_ONCE(1); | ||
| 1555 | return -1; | ||
| 1556 | } | ||
| 1557 | |||
| 1558 | static void trace_recursive_unlock(void) | ||
| 1559 | { | ||
| 1560 | WARN_ON_ONCE(!current->trace_recursion); | ||
| 1561 | |||
| 1562 | current->trace_recursion--; | ||
| 1563 | } | ||
| 1564 | |||
| 1461 | static DEFINE_PER_CPU(int, rb_need_resched); | 1565 | static DEFINE_PER_CPU(int, rb_need_resched); |
| 1462 | 1566 | ||
| 1463 | /** | 1567 | /** |
| @@ -1491,6 +1595,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
| 1491 | /* If we are tracing schedule, we don't want to recurse */ | 1595 | /* If we are tracing schedule, we don't want to recurse */ |
| 1492 | resched = ftrace_preempt_disable(); | 1596 | resched = ftrace_preempt_disable(); |
| 1493 | 1597 | ||
| 1598 | if (trace_recursive_lock()) | ||
| 1599 | goto out_nocheck; | ||
| 1600 | |||
| 1494 | cpu = raw_smp_processor_id(); | 1601 | cpu = raw_smp_processor_id(); |
| 1495 | 1602 | ||
| 1496 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 1603 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
| @@ -1501,11 +1608,10 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
| 1501 | if (atomic_read(&cpu_buffer->record_disabled)) | 1608 | if (atomic_read(&cpu_buffer->record_disabled)) |
| 1502 | goto out; | 1609 | goto out; |
| 1503 | 1610 | ||
| 1504 | length = rb_calculate_event_length(length); | 1611 | if (length > BUF_MAX_DATA_SIZE) |
| 1505 | if (length > BUF_PAGE_SIZE) | ||
| 1506 | goto out; | 1612 | goto out; |
| 1507 | 1613 | ||
| 1508 | event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length); | 1614 | event = rb_reserve_next_event(cpu_buffer, length); |
| 1509 | if (!event) | 1615 | if (!event) |
| 1510 | goto out; | 1616 | goto out; |
| 1511 | 1617 | ||
| @@ -1520,6 +1626,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) | |||
| 1520 | return event; | 1626 | return event; |
| 1521 | 1627 | ||
| 1522 | out: | 1628 | out: |
| 1629 | trace_recursive_unlock(); | ||
| 1630 | |||
| 1631 | out_nocheck: | ||
| 1523 | ftrace_preempt_enable(resched); | 1632 | ftrace_preempt_enable(resched); |
| 1524 | return NULL; | 1633 | return NULL; |
| 1525 | } | 1634 | } |
| @@ -1528,7 +1637,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); | |||
| 1528 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | 1637 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, |
| 1529 | struct ring_buffer_event *event) | 1638 | struct ring_buffer_event *event) |
| 1530 | { | 1639 | { |
| 1531 | cpu_buffer->entries++; | 1640 | local_inc(&cpu_buffer->entries); |
| 1532 | 1641 | ||
| 1533 | /* Only process further if we own the commit */ | 1642 | /* Only process further if we own the commit */ |
| 1534 | if (!rb_is_commit(cpu_buffer, event)) | 1643 | if (!rb_is_commit(cpu_buffer, event)) |
| @@ -1558,6 +1667,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, | |||
| 1558 | 1667 | ||
| 1559 | rb_commit(cpu_buffer, event); | 1668 | rb_commit(cpu_buffer, event); |
| 1560 | 1669 | ||
| 1670 | trace_recursive_unlock(); | ||
| 1671 | |||
| 1561 | /* | 1672 | /* |
| 1562 | * Only the last preempt count needs to restore preemption. | 1673 | * Only the last preempt count needs to restore preemption. |
| 1563 | */ | 1674 | */ |
| @@ -1570,6 +1681,99 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, | |||
| 1570 | } | 1681 | } |
| 1571 | EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); | 1682 | EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); |
| 1572 | 1683 | ||
| 1684 | static inline void rb_event_discard(struct ring_buffer_event *event) | ||
| 1685 | { | ||
| 1686 | /* array[0] holds the actual length for the discarded event */ | ||
| 1687 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; | ||
| 1688 | event->type_len = RINGBUF_TYPE_PADDING; | ||
| 1689 | /* time delta must be non zero */ | ||
| 1690 | if (!event->time_delta) | ||
| 1691 | event->time_delta = 1; | ||
| 1692 | } | ||
| 1693 | |||
| 1694 | /** | ||
| 1695 | * ring_buffer_event_discard - discard any event in the ring buffer | ||
| 1696 | * @event: the event to discard | ||
| 1697 | * | ||
| 1698 | * Sometimes a event that is in the ring buffer needs to be ignored. | ||
| 1699 | * This function lets the user discard an event in the ring buffer | ||
| 1700 | * and then that event will not be read later. | ||
| 1701 | * | ||
| 1702 | * Note, it is up to the user to be careful with this, and protect | ||
| 1703 | * against races. If the user discards an event that has been consumed | ||
| 1704 | * it is possible that it could corrupt the ring buffer. | ||
| 1705 | */ | ||
| 1706 | void ring_buffer_event_discard(struct ring_buffer_event *event) | ||
| 1707 | { | ||
| 1708 | rb_event_discard(event); | ||
| 1709 | } | ||
| 1710 | EXPORT_SYMBOL_GPL(ring_buffer_event_discard); | ||
| 1711 | |||
| 1712 | /** | ||
| 1713 | * ring_buffer_commit_discard - discard an event that has not been committed | ||
| 1714 | * @buffer: the ring buffer | ||
| 1715 | * @event: non committed event to discard | ||
| 1716 | * | ||
| 1717 | * This is similar to ring_buffer_event_discard but must only be | ||
| 1718 | * performed on an event that has not been committed yet. The difference | ||
| 1719 | * is that this will also try to free the event from the ring buffer | ||
| 1720 | * if another event has not been added behind it. | ||
| 1721 | * | ||
| 1722 | * If another event has been added behind it, it will set the event | ||
| 1723 | * up as discarded, and perform the commit. | ||
| 1724 | * | ||
| 1725 | * If this function is called, do not call ring_buffer_unlock_commit on | ||
| 1726 | * the event. | ||
| 1727 | */ | ||
| 1728 | void ring_buffer_discard_commit(struct ring_buffer *buffer, | ||
| 1729 | struct ring_buffer_event *event) | ||
| 1730 | { | ||
| 1731 | struct ring_buffer_per_cpu *cpu_buffer; | ||
| 1732 | int cpu; | ||
| 1733 | |||
| 1734 | /* The event is discarded regardless */ | ||
| 1735 | rb_event_discard(event); | ||
| 1736 | |||
| 1737 | /* | ||
| 1738 | * This must only be called if the event has not been | ||
| 1739 | * committed yet. Thus we can assume that preemption | ||
| 1740 | * is still disabled. | ||
| 1741 | */ | ||
| 1742 | RB_WARN_ON(buffer, preemptible()); | ||
| 1743 | |||
| 1744 | cpu = smp_processor_id(); | ||
| 1745 | cpu_buffer = buffer->buffers[cpu]; | ||
| 1746 | |||
| 1747 | if (!rb_try_to_discard(cpu_buffer, event)) | ||
| 1748 | goto out; | ||
| 1749 | |||
| 1750 | /* | ||
| 1751 | * The commit is still visible by the reader, so we | ||
| 1752 | * must increment entries. | ||
| 1753 | */ | ||
| 1754 | local_inc(&cpu_buffer->entries); | ||
| 1755 | out: | ||
| 1756 | /* | ||
| 1757 | * If a write came in and pushed the tail page | ||
| 1758 | * we still need to update the commit pointer | ||
| 1759 | * if we were the commit. | ||
| 1760 | */ | ||
| 1761 | if (rb_is_commit(cpu_buffer, event)) | ||
| 1762 | rb_set_commit_to_write(cpu_buffer); | ||
| 1763 | |||
| 1764 | trace_recursive_unlock(); | ||
| 1765 | |||
| 1766 | /* | ||
| 1767 | * Only the last preempt count needs to restore preemption. | ||
| 1768 | */ | ||
| 1769 | if (preempt_count() == 1) | ||
| 1770 | ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); | ||
| 1771 | else | ||
| 1772 | preempt_enable_no_resched_notrace(); | ||
| 1773 | |||
| 1774 | } | ||
| 1775 | EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); | ||
| 1776 | |||
| 1573 | /** | 1777 | /** |
| 1574 | * ring_buffer_write - write data to the buffer without reserving | 1778 | * ring_buffer_write - write data to the buffer without reserving |
| 1575 | * @buffer: The ring buffer to write to. | 1779 | * @buffer: The ring buffer to write to. |
| @@ -1589,7 +1793,6 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
| 1589 | { | 1793 | { |
| 1590 | struct ring_buffer_per_cpu *cpu_buffer; | 1794 | struct ring_buffer_per_cpu *cpu_buffer; |
| 1591 | struct ring_buffer_event *event; | 1795 | struct ring_buffer_event *event; |
| 1592 | unsigned long event_length; | ||
| 1593 | void *body; | 1796 | void *body; |
| 1594 | int ret = -EBUSY; | 1797 | int ret = -EBUSY; |
| 1595 | int cpu, resched; | 1798 | int cpu, resched; |
| @@ -1612,9 +1815,10 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
| 1612 | if (atomic_read(&cpu_buffer->record_disabled)) | 1815 | if (atomic_read(&cpu_buffer->record_disabled)) |
| 1613 | goto out; | 1816 | goto out; |
| 1614 | 1817 | ||
| 1615 | event_length = rb_calculate_event_length(length); | 1818 | if (length > BUF_MAX_DATA_SIZE) |
| 1616 | event = rb_reserve_next_event(cpu_buffer, | 1819 | goto out; |
| 1617 | RINGBUF_TYPE_DATA, event_length); | 1820 | |
| 1821 | event = rb_reserve_next_event(cpu_buffer, length); | ||
| 1618 | if (!event) | 1822 | if (!event) |
| 1619 | goto out; | 1823 | goto out; |
| 1620 | 1824 | ||
| @@ -1728,7 +1932,8 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) | |||
| 1728 | return 0; | 1932 | return 0; |
| 1729 | 1933 | ||
| 1730 | cpu_buffer = buffer->buffers[cpu]; | 1934 | cpu_buffer = buffer->buffers[cpu]; |
| 1731 | ret = cpu_buffer->entries; | 1935 | ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun) |
| 1936 | - cpu_buffer->read; | ||
| 1732 | 1937 | ||
| 1733 | return ret; | 1938 | return ret; |
| 1734 | } | 1939 | } |
| @@ -1755,6 +1960,47 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) | |||
| 1755 | EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); | 1960 | EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); |
| 1756 | 1961 | ||
| 1757 | /** | 1962 | /** |
| 1963 | * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped | ||
| 1964 | * @buffer: The ring buffer | ||
| 1965 | * @cpu: The per CPU buffer to get the number of overruns from | ||
| 1966 | */ | ||
| 1967 | unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu) | ||
| 1968 | { | ||
| 1969 | struct ring_buffer_per_cpu *cpu_buffer; | ||
| 1970 | unsigned long ret; | ||
| 1971 | |||
| 1972 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
| 1973 | return 0; | ||
| 1974 | |||
| 1975 | cpu_buffer = buffer->buffers[cpu]; | ||
| 1976 | ret = cpu_buffer->nmi_dropped; | ||
| 1977 | |||
| 1978 | return ret; | ||
| 1979 | } | ||
| 1980 | EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu); | ||
| 1981 | |||
| 1982 | /** | ||
| 1983 | * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits | ||
| 1984 | * @buffer: The ring buffer | ||
| 1985 | * @cpu: The per CPU buffer to get the number of overruns from | ||
| 1986 | */ | ||
| 1987 | unsigned long | ||
| 1988 | ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu) | ||
| 1989 | { | ||
| 1990 | struct ring_buffer_per_cpu *cpu_buffer; | ||
| 1991 | unsigned long ret; | ||
| 1992 | |||
| 1993 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
| 1994 | return 0; | ||
| 1995 | |||
| 1996 | cpu_buffer = buffer->buffers[cpu]; | ||
| 1997 | ret = cpu_buffer->commit_overrun; | ||
| 1998 | |||
| 1999 | return ret; | ||
| 2000 | } | ||
| 2001 | EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu); | ||
| 2002 | |||
| 2003 | /** | ||
| 1758 | * ring_buffer_entries - get the number of entries in a buffer | 2004 | * ring_buffer_entries - get the number of entries in a buffer |
| 1759 | * @buffer: The ring buffer | 2005 | * @buffer: The ring buffer |
| 1760 | * | 2006 | * |
| @@ -1770,7 +2016,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) | |||
| 1770 | /* if you care about this being correct, lock the buffer */ | 2016 | /* if you care about this being correct, lock the buffer */ |
| 1771 | for_each_buffer_cpu(buffer, cpu) { | 2017 | for_each_buffer_cpu(buffer, cpu) { |
| 1772 | cpu_buffer = buffer->buffers[cpu]; | 2018 | cpu_buffer = buffer->buffers[cpu]; |
| 1773 | entries += cpu_buffer->entries; | 2019 | entries += (local_read(&cpu_buffer->entries) - |
| 2020 | cpu_buffer->overrun) - cpu_buffer->read; | ||
| 1774 | } | 2021 | } |
| 1775 | 2022 | ||
| 1776 | return entries; | 2023 | return entries; |
| @@ -1862,7 +2109,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1862 | { | 2109 | { |
| 1863 | u64 delta; | 2110 | u64 delta; |
| 1864 | 2111 | ||
| 1865 | switch (event->type) { | 2112 | switch (event->type_len) { |
| 1866 | case RINGBUF_TYPE_PADDING: | 2113 | case RINGBUF_TYPE_PADDING: |
| 1867 | return; | 2114 | return; |
| 1868 | 2115 | ||
| @@ -1893,7 +2140,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter, | |||
| 1893 | { | 2140 | { |
| 1894 | u64 delta; | 2141 | u64 delta; |
| 1895 | 2142 | ||
| 1896 | switch (event->type) { | 2143 | switch (event->type_len) { |
| 1897 | case RINGBUF_TYPE_PADDING: | 2144 | case RINGBUF_TYPE_PADDING: |
| 1898 | return; | 2145 | return; |
| 1899 | 2146 | ||
| @@ -1966,6 +2213,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 1966 | cpu_buffer->reader_page->list.prev = reader->list.prev; | 2213 | cpu_buffer->reader_page->list.prev = reader->list.prev; |
| 1967 | 2214 | ||
| 1968 | local_set(&cpu_buffer->reader_page->write, 0); | 2215 | local_set(&cpu_buffer->reader_page->write, 0); |
| 2216 | local_set(&cpu_buffer->reader_page->entries, 0); | ||
| 1969 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 2217 | local_set(&cpu_buffer->reader_page->page->commit, 0); |
| 1970 | 2218 | ||
| 1971 | /* Make the reader page now replace the head */ | 2219 | /* Make the reader page now replace the head */ |
| @@ -2008,8 +2256,9 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 2008 | 2256 | ||
| 2009 | event = rb_reader_event(cpu_buffer); | 2257 | event = rb_reader_event(cpu_buffer); |
| 2010 | 2258 | ||
| 2011 | if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event)) | 2259 | if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX |
| 2012 | cpu_buffer->entries--; | 2260 | || rb_discarded_event(event)) |
| 2261 | cpu_buffer->read++; | ||
| 2013 | 2262 | ||
| 2014 | rb_update_read_stamp(cpu_buffer, event); | 2263 | rb_update_read_stamp(cpu_buffer, event); |
| 2015 | 2264 | ||
| @@ -2031,8 +2280,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) | |||
| 2031 | * Check if we are at the end of the buffer. | 2280 | * Check if we are at the end of the buffer. |
| 2032 | */ | 2281 | */ |
| 2033 | if (iter->head >= rb_page_size(iter->head_page)) { | 2282 | if (iter->head >= rb_page_size(iter->head_page)) { |
| 2034 | if (RB_WARN_ON(buffer, | 2283 | /* discarded commits can make the page empty */ |
| 2035 | iter->head_page == cpu_buffer->commit_page)) | 2284 | if (iter->head_page == cpu_buffer->commit_page) |
| 2036 | return; | 2285 | return; |
| 2037 | rb_inc_iter(iter); | 2286 | rb_inc_iter(iter); |
| 2038 | return; | 2287 | return; |
| @@ -2075,12 +2324,10 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 2075 | /* | 2324 | /* |
| 2076 | * We repeat when a timestamp is encountered. It is possible | 2325 | * We repeat when a timestamp is encountered. It is possible |
| 2077 | * to get multiple timestamps from an interrupt entering just | 2326 | * to get multiple timestamps from an interrupt entering just |
| 2078 | * as one timestamp is about to be written. The max times | 2327 | * as one timestamp is about to be written, or from discarded |
| 2079 | * that this can happen is the number of nested interrupts we | 2328 | * commits. The most that we can have is the number on a single page. |
| 2080 | * can have. Nesting 10 deep of interrupts is clearly | ||
| 2081 | * an anomaly. | ||
| 2082 | */ | 2329 | */ |
| 2083 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) | 2330 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) |
| 2084 | return NULL; | 2331 | return NULL; |
| 2085 | 2332 | ||
| 2086 | reader = rb_get_reader_page(cpu_buffer); | 2333 | reader = rb_get_reader_page(cpu_buffer); |
| @@ -2089,7 +2336,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 2089 | 2336 | ||
| 2090 | event = rb_reader_event(cpu_buffer); | 2337 | event = rb_reader_event(cpu_buffer); |
| 2091 | 2338 | ||
| 2092 | switch (event->type) { | 2339 | switch (event->type_len) { |
| 2093 | case RINGBUF_TYPE_PADDING: | 2340 | case RINGBUF_TYPE_PADDING: |
| 2094 | if (rb_null_event(event)) | 2341 | if (rb_null_event(event)) |
| 2095 | RB_WARN_ON(cpu_buffer, 1); | 2342 | RB_WARN_ON(cpu_buffer, 1); |
| @@ -2146,14 +2393,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 2146 | 2393 | ||
| 2147 | again: | 2394 | again: |
| 2148 | /* | 2395 | /* |
| 2149 | * We repeat when a timestamp is encountered. It is possible | 2396 | * We repeat when a timestamp is encountered. |
| 2150 | * to get multiple timestamps from an interrupt entering just | 2397 | * We can get multiple timestamps by nested interrupts or also |
| 2151 | * as one timestamp is about to be written. The max times | 2398 | * if filtering is on (discarding commits). Since discarding |
| 2152 | * that this can happen is the number of nested interrupts we | 2399 | * commits can be frequent we can get a lot of timestamps. |
| 2153 | * can have. Nesting 10 deep of interrupts is clearly | 2400 | * But we limit them by not adding timestamps if they begin |
| 2154 | * an anomaly. | 2401 | * at the start of a page. |
| 2155 | */ | 2402 | */ |
| 2156 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10)) | 2403 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) |
| 2157 | return NULL; | 2404 | return NULL; |
| 2158 | 2405 | ||
| 2159 | if (rb_per_cpu_empty(cpu_buffer)) | 2406 | if (rb_per_cpu_empty(cpu_buffer)) |
| @@ -2161,7 +2408,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 2161 | 2408 | ||
| 2162 | event = rb_iter_head_event(iter); | 2409 | event = rb_iter_head_event(iter); |
| 2163 | 2410 | ||
| 2164 | switch (event->type) { | 2411 | switch (event->type_len) { |
| 2165 | case RINGBUF_TYPE_PADDING: | 2412 | case RINGBUF_TYPE_PADDING: |
| 2166 | if (rb_null_event(event)) { | 2413 | if (rb_null_event(event)) { |
| 2167 | rb_inc_iter(iter); | 2414 | rb_inc_iter(iter); |
| @@ -2220,7 +2467,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 2220 | event = rb_buffer_peek(buffer, cpu, ts); | 2467 | event = rb_buffer_peek(buffer, cpu, ts); |
| 2221 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2468 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| 2222 | 2469 | ||
| 2223 | if (event && event->type == RINGBUF_TYPE_PADDING) { | 2470 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { |
| 2224 | cpu_relax(); | 2471 | cpu_relax(); |
| 2225 | goto again; | 2472 | goto again; |
| 2226 | } | 2473 | } |
| @@ -2248,7 +2495,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 2248 | event = rb_iter_peek(iter, ts); | 2495 | event = rb_iter_peek(iter, ts); |
| 2249 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2496 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| 2250 | 2497 | ||
| 2251 | if (event && event->type == RINGBUF_TYPE_PADDING) { | 2498 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { |
| 2252 | cpu_relax(); | 2499 | cpu_relax(); |
| 2253 | goto again; | 2500 | goto again; |
| 2254 | } | 2501 | } |
| @@ -2293,7 +2540,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 2293 | out: | 2540 | out: |
| 2294 | preempt_enable(); | 2541 | preempt_enable(); |
| 2295 | 2542 | ||
| 2296 | if (event && event->type == RINGBUF_TYPE_PADDING) { | 2543 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { |
| 2297 | cpu_relax(); | 2544 | cpu_relax(); |
| 2298 | goto again; | 2545 | goto again; |
| 2299 | } | 2546 | } |
| @@ -2386,7 +2633,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | |||
| 2386 | out: | 2633 | out: |
| 2387 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2634 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| 2388 | 2635 | ||
| 2389 | if (event && event->type == RINGBUF_TYPE_PADDING) { | 2636 | if (event && event->type_len == RINGBUF_TYPE_PADDING) { |
| 2390 | cpu_relax(); | 2637 | cpu_relax(); |
| 2391 | goto again; | 2638 | goto again; |
| 2392 | } | 2639 | } |
| @@ -2411,6 +2658,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 2411 | cpu_buffer->head_page | 2658 | cpu_buffer->head_page |
| 2412 | = list_entry(cpu_buffer->pages.next, struct buffer_page, list); | 2659 | = list_entry(cpu_buffer->pages.next, struct buffer_page, list); |
| 2413 | local_set(&cpu_buffer->head_page->write, 0); | 2660 | local_set(&cpu_buffer->head_page->write, 0); |
| 2661 | local_set(&cpu_buffer->head_page->entries, 0); | ||
| 2414 | local_set(&cpu_buffer->head_page->page->commit, 0); | 2662 | local_set(&cpu_buffer->head_page->page->commit, 0); |
| 2415 | 2663 | ||
| 2416 | cpu_buffer->head_page->read = 0; | 2664 | cpu_buffer->head_page->read = 0; |
| @@ -2420,11 +2668,15 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 2420 | 2668 | ||
| 2421 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); | 2669 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); |
| 2422 | local_set(&cpu_buffer->reader_page->write, 0); | 2670 | local_set(&cpu_buffer->reader_page->write, 0); |
| 2671 | local_set(&cpu_buffer->reader_page->entries, 0); | ||
| 2423 | local_set(&cpu_buffer->reader_page->page->commit, 0); | 2672 | local_set(&cpu_buffer->reader_page->page->commit, 0); |
| 2424 | cpu_buffer->reader_page->read = 0; | 2673 | cpu_buffer->reader_page->read = 0; |
| 2425 | 2674 | ||
| 2675 | cpu_buffer->nmi_dropped = 0; | ||
| 2676 | cpu_buffer->commit_overrun = 0; | ||
| 2426 | cpu_buffer->overrun = 0; | 2677 | cpu_buffer->overrun = 0; |
| 2427 | cpu_buffer->entries = 0; | 2678 | cpu_buffer->read = 0; |
| 2679 | local_set(&cpu_buffer->entries, 0); | ||
| 2428 | 2680 | ||
| 2429 | cpu_buffer->write_stamp = 0; | 2681 | cpu_buffer->write_stamp = 0; |
| 2430 | cpu_buffer->read_stamp = 0; | 2682 | cpu_buffer->read_stamp = 0; |
| @@ -2443,6 +2695,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) | |||
| 2443 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2695 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
| 2444 | return; | 2696 | return; |
| 2445 | 2697 | ||
| 2698 | atomic_inc(&cpu_buffer->record_disabled); | ||
| 2699 | |||
| 2446 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2700 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| 2447 | 2701 | ||
| 2448 | __raw_spin_lock(&cpu_buffer->lock); | 2702 | __raw_spin_lock(&cpu_buffer->lock); |
| @@ -2452,6 +2706,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) | |||
| 2452 | __raw_spin_unlock(&cpu_buffer->lock); | 2706 | __raw_spin_unlock(&cpu_buffer->lock); |
| 2453 | 2707 | ||
| 2454 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2708 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| 2709 | |||
| 2710 | atomic_dec(&cpu_buffer->record_disabled); | ||
| 2455 | } | 2711 | } |
| 2456 | EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); | 2712 | EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); |
| 2457 | 2713 | ||
| @@ -2578,28 +2834,6 @@ out: | |||
| 2578 | } | 2834 | } |
| 2579 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); | 2835 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); |
| 2580 | 2836 | ||
| 2581 | static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, | ||
| 2582 | struct buffer_data_page *bpage, | ||
| 2583 | unsigned int offset) | ||
| 2584 | { | ||
| 2585 | struct ring_buffer_event *event; | ||
| 2586 | unsigned long head; | ||
| 2587 | |||
| 2588 | __raw_spin_lock(&cpu_buffer->lock); | ||
| 2589 | for (head = offset; head < local_read(&bpage->commit); | ||
| 2590 | head += rb_event_length(event)) { | ||
| 2591 | |||
| 2592 | event = __rb_data_page_index(bpage, head); | ||
| 2593 | if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) | ||
| 2594 | return; | ||
| 2595 | /* Only count data entries */ | ||
| 2596 | if (event->type != RINGBUF_TYPE_DATA) | ||
| 2597 | continue; | ||
| 2598 | cpu_buffer->entries--; | ||
| 2599 | } | ||
| 2600 | __raw_spin_unlock(&cpu_buffer->lock); | ||
| 2601 | } | ||
| 2602 | |||
| 2603 | /** | 2837 | /** |
| 2604 | * ring_buffer_alloc_read_page - allocate a page to read from buffer | 2838 | * ring_buffer_alloc_read_page - allocate a page to read from buffer |
| 2605 | * @buffer: the buffer to allocate for. | 2839 | * @buffer: the buffer to allocate for. |
| @@ -2630,6 +2864,7 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) | |||
| 2630 | 2864 | ||
| 2631 | return bpage; | 2865 | return bpage; |
| 2632 | } | 2866 | } |
| 2867 | EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page); | ||
| 2633 | 2868 | ||
| 2634 | /** | 2869 | /** |
| 2635 | * ring_buffer_free_read_page - free an allocated read page | 2870 | * ring_buffer_free_read_page - free an allocated read page |
| @@ -2642,6 +2877,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) | |||
| 2642 | { | 2877 | { |
| 2643 | free_page((unsigned long)data); | 2878 | free_page((unsigned long)data); |
| 2644 | } | 2879 | } |
| 2880 | EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); | ||
| 2645 | 2881 | ||
| 2646 | /** | 2882 | /** |
| 2647 | * ring_buffer_read_page - extract a page from the ring buffer | 2883 | * ring_buffer_read_page - extract a page from the ring buffer |
| @@ -2768,16 +3004,17 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
| 2768 | /* we copied everything to the beginning */ | 3004 | /* we copied everything to the beginning */ |
| 2769 | read = 0; | 3005 | read = 0; |
| 2770 | } else { | 3006 | } else { |
| 3007 | /* update the entry counter */ | ||
| 3008 | cpu_buffer->read += local_read(&reader->entries); | ||
| 3009 | |||
| 2771 | /* swap the pages */ | 3010 | /* swap the pages */ |
| 2772 | rb_init_page(bpage); | 3011 | rb_init_page(bpage); |
| 2773 | bpage = reader->page; | 3012 | bpage = reader->page; |
| 2774 | reader->page = *data_page; | 3013 | reader->page = *data_page; |
| 2775 | local_set(&reader->write, 0); | 3014 | local_set(&reader->write, 0); |
| 3015 | local_set(&reader->entries, 0); | ||
| 2776 | reader->read = 0; | 3016 | reader->read = 0; |
| 2777 | *data_page = bpage; | 3017 | *data_page = bpage; |
| 2778 | |||
| 2779 | /* update the entry counter */ | ||
| 2780 | rb_remove_entries(cpu_buffer, bpage, read); | ||
| 2781 | } | 3018 | } |
| 2782 | ret = read; | 3019 | ret = read; |
| 2783 | 3020 | ||
| @@ -2787,6 +3024,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
| 2787 | out: | 3024 | out: |
| 2788 | return ret; | 3025 | return ret; |
| 2789 | } | 3026 | } |
| 3027 | EXPORT_SYMBOL_GPL(ring_buffer_read_page); | ||
| 2790 | 3028 | ||
| 2791 | static ssize_t | 3029 | static ssize_t |
| 2792 | rb_simple_read(struct file *filp, char __user *ubuf, | 3030 | rb_simple_read(struct file *filp, char __user *ubuf, |
| @@ -2845,14 +3083,11 @@ static const struct file_operations rb_simple_fops = { | |||
| 2845 | static __init int rb_init_debugfs(void) | 3083 | static __init int rb_init_debugfs(void) |
| 2846 | { | 3084 | { |
| 2847 | struct dentry *d_tracer; | 3085 | struct dentry *d_tracer; |
| 2848 | struct dentry *entry; | ||
| 2849 | 3086 | ||
| 2850 | d_tracer = tracing_init_dentry(); | 3087 | d_tracer = tracing_init_dentry(); |
| 2851 | 3088 | ||
| 2852 | entry = debugfs_create_file("tracing_on", 0644, d_tracer, | 3089 | trace_create_file("tracing_on", 0644, d_tracer, |
| 2853 | &ring_buffer_flags, &rb_simple_fops); | 3090 | &ring_buffer_flags, &rb_simple_fops); |
| 2854 | if (!entry) | ||
| 2855 | pr_warning("Could not create debugfs 'tracing_on' entry\n"); | ||
| 2856 | 3091 | ||
| 2857 | return 0; | 3092 | return 0; |
| 2858 | } | 3093 | } |
