diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
| -rw-r--r-- | kernel/trace/ring_buffer.c | 693 |
1 files changed, 529 insertions, 164 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bd38c5cfd8ad..960cbf44c844 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
| @@ -4,21 +4,92 @@ | |||
| 4 | * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> | 4 | * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> |
| 5 | */ | 5 | */ |
| 6 | #include <linux/ring_buffer.h> | 6 | #include <linux/ring_buffer.h> |
| 7 | #include <linux/trace_clock.h> | ||
| 8 | #include <linux/ftrace_irq.h> | ||
| 7 | #include <linux/spinlock.h> | 9 | #include <linux/spinlock.h> |
| 8 | #include <linux/debugfs.h> | 10 | #include <linux/debugfs.h> |
| 9 | #include <linux/uaccess.h> | 11 | #include <linux/uaccess.h> |
| 12 | #include <linux/hardirq.h> | ||
| 10 | #include <linux/module.h> | 13 | #include <linux/module.h> |
| 11 | #include <linux/percpu.h> | 14 | #include <linux/percpu.h> |
| 12 | #include <linux/mutex.h> | 15 | #include <linux/mutex.h> |
| 13 | #include <linux/sched.h> /* used for sched_clock() (for now) */ | ||
| 14 | #include <linux/init.h> | 16 | #include <linux/init.h> |
| 15 | #include <linux/hash.h> | 17 | #include <linux/hash.h> |
| 16 | #include <linux/list.h> | 18 | #include <linux/list.h> |
| 19 | #include <linux/cpu.h> | ||
| 17 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
| 18 | 21 | ||
| 19 | #include "trace.h" | 22 | #include "trace.h" |
| 20 | 23 | ||
| 21 | /* | 24 | /* |
| 25 | * The ring buffer is made up of a list of pages. A separate list of pages is | ||
| 26 | * allocated for each CPU. A writer may only write to a buffer that is | ||
| 27 | * associated with the CPU it is currently executing on. A reader may read | ||
| 28 | * from any per cpu buffer. | ||
| 29 | * | ||
| 30 | * The reader is special. For each per cpu buffer, the reader has its own | ||
| 31 | * reader page. When a reader has read the entire reader page, this reader | ||
| 32 | * page is swapped with another page in the ring buffer. | ||
| 33 | * | ||
| 34 | * Now, as long as the writer is off the reader page, the reader can do what | ||
| 35 | * ever it wants with that page. The writer will never write to that page | ||
| 36 | * again (as long as it is out of the ring buffer). | ||
| 37 | * | ||
| 38 | * Here's some silly ASCII art. | ||
| 39 | * | ||
| 40 | * +------+ | ||
| 41 | * |reader| RING BUFFER | ||
| 42 | * |page | | ||
| 43 | * +------+ +---+ +---+ +---+ | ||
| 44 | * | |-->| |-->| | | ||
| 45 | * +---+ +---+ +---+ | ||
| 46 | * ^ | | ||
| 47 | * | | | ||
| 48 | * +---------------+ | ||
| 49 | * | ||
| 50 | * | ||
| 51 | * +------+ | ||
| 52 | * |reader| RING BUFFER | ||
| 53 | * |page |------------------v | ||
| 54 | * +------+ +---+ +---+ +---+ | ||
| 55 | * | |-->| |-->| | | ||
| 56 | * +---+ +---+ +---+ | ||
| 57 | * ^ | | ||
| 58 | * | | | ||
| 59 | * +---------------+ | ||
| 60 | * | ||
| 61 | * | ||
| 62 | * +------+ | ||
| 63 | * |reader| RING BUFFER | ||
| 64 | * |page |------------------v | ||
| 65 | * +------+ +---+ +---+ +---+ | ||
| 66 | * ^ | |-->| |-->| | | ||
| 67 | * | +---+ +---+ +---+ | ||
| 68 | * | | | ||
| 69 | * | | | ||
| 70 | * +------------------------------+ | ||
| 71 | * | ||
| 72 | * | ||
| 73 | * +------+ | ||
| 74 | * |buffer| RING BUFFER | ||
| 75 | * |page |------------------v | ||
| 76 | * +------+ +---+ +---+ +---+ | ||
| 77 | * ^ | | | |-->| | | ||
| 78 | * | New +---+ +---+ +---+ | ||
| 79 | * | Reader------^ | | ||
| 80 | * | page | | ||
| 81 | * +------------------------------+ | ||
| 82 | * | ||
| 83 | * | ||
| 84 | * After we make this swap, the reader can hand this page off to the splice | ||
| 85 | * code and be done with it. It can even allocate a new page if it needs to | ||
| 86 | * and swap that into the ring buffer. | ||
| 87 | * | ||
| 88 | * We will be using cmpxchg soon to make all this lockless. | ||
| 89 | * | ||
| 90 | */ | ||
| 91 | |||
| 92 | /* | ||
| 22 | * A fast way to enable or disable all ring buffers is to | 93 | * A fast way to enable or disable all ring buffers is to |
| 23 | * call tracing_on or tracing_off. Turning off the ring buffers | 94 | * call tracing_on or tracing_off. Turning off the ring buffers |
| 24 | * prevents all ring buffers from being recorded to. | 95 | * prevents all ring buffers from being recorded to. |
| @@ -57,7 +128,9 @@ enum { | |||
| 57 | RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, | 128 | RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, |
| 58 | }; | 129 | }; |
| 59 | 130 | ||
| 60 | static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; | 131 | static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; |
| 132 | |||
| 133 | #define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) | ||
| 61 | 134 | ||
| 62 | /** | 135 | /** |
| 63 | * tracing_on - enable all tracing buffers | 136 | * tracing_on - enable all tracing buffers |
| @@ -89,59 +162,92 @@ EXPORT_SYMBOL_GPL(tracing_off); | |||
| 89 | * tracing_off_permanent - permanently disable ring buffers | 162 | * tracing_off_permanent - permanently disable ring buffers |
| 90 | * | 163 | * |
| 91 | * This function, once called, will disable all ring buffers | 164 | * This function, once called, will disable all ring buffers |
| 92 | * permanenty. | 165 | * permanently. |
| 93 | */ | 166 | */ |
| 94 | void tracing_off_permanent(void) | 167 | void tracing_off_permanent(void) |
| 95 | { | 168 | { |
| 96 | set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); | 169 | set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); |
| 97 | } | 170 | } |
| 98 | 171 | ||
| 172 | /** | ||
| 173 | * tracing_is_on - show state of ring buffers enabled | ||
| 174 | */ | ||
| 175 | int tracing_is_on(void) | ||
| 176 | { | ||
| 177 | return ring_buffer_flags == RB_BUFFERS_ON; | ||
| 178 | } | ||
| 179 | EXPORT_SYMBOL_GPL(tracing_is_on); | ||
| 180 | |||
| 99 | #include "trace.h" | 181 | #include "trace.h" |
| 100 | 182 | ||
| 101 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | 183 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) |
| 102 | #define DEBUG_SHIFT 0 | 184 | #define RB_ALIGNMENT 4U |
| 185 | #define RB_MAX_SMALL_DATA 28 | ||
| 186 | |||
| 187 | enum { | ||
| 188 | RB_LEN_TIME_EXTEND = 8, | ||
| 189 | RB_LEN_TIME_STAMP = 16, | ||
| 190 | }; | ||
| 103 | 191 | ||
| 104 | /* FIXME!!! */ | 192 | static inline int rb_null_event(struct ring_buffer_event *event) |
| 105 | u64 ring_buffer_time_stamp(int cpu) | ||
| 106 | { | 193 | { |
| 107 | u64 time; | 194 | return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0; |
| 195 | } | ||
| 108 | 196 | ||
| 109 | preempt_disable_notrace(); | 197 | static inline int rb_discarded_event(struct ring_buffer_event *event) |
| 110 | /* shift to debug/test normalization and TIME_EXTENTS */ | 198 | { |
| 111 | time = sched_clock() << DEBUG_SHIFT; | 199 | return event->type == RINGBUF_TYPE_PADDING && event->time_delta; |
| 112 | preempt_enable_no_resched_notrace(); | 200 | } |
| 113 | 201 | ||
| 114 | return time; | 202 | static void rb_event_set_padding(struct ring_buffer_event *event) |
| 203 | { | ||
| 204 | event->type = RINGBUF_TYPE_PADDING; | ||
| 205 | event->time_delta = 0; | ||
| 115 | } | 206 | } |
| 116 | EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); | ||
| 117 | 207 | ||
| 118 | void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) | 208 | /** |
| 209 | * ring_buffer_event_discard - discard an event in the ring buffer | ||
| 210 | * @buffer: the ring buffer | ||
| 211 | * @event: the event to discard | ||
| 212 | * | ||
| 213 | * Sometimes a event that is in the ring buffer needs to be ignored. | ||
| 214 | * This function lets the user discard an event in the ring buffer | ||
| 215 | * and then that event will not be read later. | ||
| 216 | * | ||
| 217 | * Note, it is up to the user to be careful with this, and protect | ||
| 218 | * against races. If the user discards an event that has been consumed | ||
| 219 | * it is possible that it could corrupt the ring buffer. | ||
| 220 | */ | ||
| 221 | void ring_buffer_event_discard(struct ring_buffer_event *event) | ||
| 119 | { | 222 | { |
| 120 | /* Just stupid testing the normalize function and deltas */ | 223 | event->type = RINGBUF_TYPE_PADDING; |
| 121 | *ts >>= DEBUG_SHIFT; | 224 | /* time delta must be non zero */ |
| 225 | if (!event->time_delta) | ||
| 226 | event->time_delta = 1; | ||
| 122 | } | 227 | } |
| 123 | EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); | ||
| 124 | 228 | ||
| 125 | #define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event)) | 229 | static unsigned |
| 126 | #define RB_ALIGNMENT_SHIFT 2 | 230 | rb_event_data_length(struct ring_buffer_event *event) |
| 127 | #define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT) | 231 | { |
| 128 | #define RB_MAX_SMALL_DATA 28 | 232 | unsigned length; |
| 129 | 233 | ||
| 130 | enum { | 234 | if (event->len) |
| 131 | RB_LEN_TIME_EXTEND = 8, | 235 | length = event->len * RB_ALIGNMENT; |
| 132 | RB_LEN_TIME_STAMP = 16, | 236 | else |
| 133 | }; | 237 | length = event->array[0]; |
| 238 | return length + RB_EVNT_HDR_SIZE; | ||
| 239 | } | ||
| 134 | 240 | ||
| 135 | /* inline for ring buffer fast paths */ | 241 | /* inline for ring buffer fast paths */ |
| 136 | static inline unsigned | 242 | static unsigned |
| 137 | rb_event_length(struct ring_buffer_event *event) | 243 | rb_event_length(struct ring_buffer_event *event) |
| 138 | { | 244 | { |
| 139 | unsigned length; | ||
| 140 | |||
| 141 | switch (event->type) { | 245 | switch (event->type) { |
| 142 | case RINGBUF_TYPE_PADDING: | 246 | case RINGBUF_TYPE_PADDING: |
| 143 | /* undefined */ | 247 | if (rb_null_event(event)) |
| 144 | return -1; | 248 | /* undefined */ |
| 249 | return -1; | ||
| 250 | return rb_event_data_length(event); | ||
| 145 | 251 | ||
| 146 | case RINGBUF_TYPE_TIME_EXTEND: | 252 | case RINGBUF_TYPE_TIME_EXTEND: |
| 147 | return RB_LEN_TIME_EXTEND; | 253 | return RB_LEN_TIME_EXTEND; |
| @@ -150,11 +256,7 @@ rb_event_length(struct ring_buffer_event *event) | |||
| 150 | return RB_LEN_TIME_STAMP; | 256 | return RB_LEN_TIME_STAMP; |
| 151 | 257 | ||
| 152 | case RINGBUF_TYPE_DATA: | 258 | case RINGBUF_TYPE_DATA: |
| 153 | if (event->len) | 259 | return rb_event_data_length(event); |
| 154 | length = event->len << RB_ALIGNMENT_SHIFT; | ||
| 155 | else | ||
| 156 | length = event->array[0]; | ||
| 157 | return length + RB_EVNT_HDR_SIZE; | ||
| 158 | default: | 260 | default: |
| 159 | BUG(); | 261 | BUG(); |
| 160 | } | 262 | } |
| @@ -179,7 +281,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event) | |||
| 179 | EXPORT_SYMBOL_GPL(ring_buffer_event_length); | 281 | EXPORT_SYMBOL_GPL(ring_buffer_event_length); |
| 180 | 282 | ||
| 181 | /* inline for ring buffer fast paths */ | 283 | /* inline for ring buffer fast paths */ |
| 182 | static inline void * | 284 | static void * |
| 183 | rb_event_data(struct ring_buffer_event *event) | 285 | rb_event_data(struct ring_buffer_event *event) |
| 184 | { | 286 | { |
| 185 | BUG_ON(event->type != RINGBUF_TYPE_DATA); | 287 | BUG_ON(event->type != RINGBUF_TYPE_DATA); |
| @@ -209,7 +311,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); | |||
| 209 | 311 | ||
| 210 | struct buffer_data_page { | 312 | struct buffer_data_page { |
| 211 | u64 time_stamp; /* page time stamp */ | 313 | u64 time_stamp; /* page time stamp */ |
| 212 | local_t commit; /* write commited index */ | 314 | local_t commit; /* write committed index */ |
| 213 | unsigned char data[]; /* data of buffer page */ | 315 | unsigned char data[]; /* data of buffer page */ |
| 214 | }; | 316 | }; |
| 215 | 317 | ||
| @@ -225,14 +327,25 @@ static void rb_init_page(struct buffer_data_page *bpage) | |||
| 225 | local_set(&bpage->commit, 0); | 327 | local_set(&bpage->commit, 0); |
| 226 | } | 328 | } |
| 227 | 329 | ||
| 330 | /** | ||
| 331 | * ring_buffer_page_len - the size of data on the page. | ||
| 332 | * @page: The page to read | ||
| 333 | * | ||
| 334 | * Returns the amount of data on the page, including buffer page header. | ||
| 335 | */ | ||
| 336 | size_t ring_buffer_page_len(void *page) | ||
| 337 | { | ||
| 338 | return local_read(&((struct buffer_data_page *)page)->commit) | ||
| 339 | + BUF_PAGE_HDR_SIZE; | ||
| 340 | } | ||
| 341 | |||
| 228 | /* | 342 | /* |
| 229 | * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing | 343 | * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing |
| 230 | * this issue out. | 344 | * this issue out. |
| 231 | */ | 345 | */ |
| 232 | static inline void free_buffer_page(struct buffer_page *bpage) | 346 | static void free_buffer_page(struct buffer_page *bpage) |
| 233 | { | 347 | { |
| 234 | if (bpage->page) | 348 | free_page((unsigned long)bpage->page); |
| 235 | free_page((unsigned long)bpage->page); | ||
| 236 | kfree(bpage); | 349 | kfree(bpage); |
| 237 | } | 350 | } |
| 238 | 351 | ||
| @@ -246,7 +359,7 @@ static inline int test_time_stamp(u64 delta) | |||
| 246 | return 0; | 359 | return 0; |
| 247 | } | 360 | } |
| 248 | 361 | ||
| 249 | #define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data)) | 362 | #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) |
| 250 | 363 | ||
| 251 | /* | 364 | /* |
| 252 | * head_page == tail_page && head == tail then buffer is empty. | 365 | * head_page == tail_page && head == tail then buffer is empty. |
| @@ -260,7 +373,7 @@ struct ring_buffer_per_cpu { | |||
| 260 | struct list_head pages; | 373 | struct list_head pages; |
| 261 | struct buffer_page *head_page; /* read from head */ | 374 | struct buffer_page *head_page; /* read from head */ |
| 262 | struct buffer_page *tail_page; /* write to tail */ | 375 | struct buffer_page *tail_page; /* write to tail */ |
| 263 | struct buffer_page *commit_page; /* commited pages */ | 376 | struct buffer_page *commit_page; /* committed pages */ |
| 264 | struct buffer_page *reader_page; | 377 | struct buffer_page *reader_page; |
| 265 | unsigned long overrun; | 378 | unsigned long overrun; |
| 266 | unsigned long entries; | 379 | unsigned long entries; |
| @@ -273,12 +386,17 @@ struct ring_buffer { | |||
| 273 | unsigned pages; | 386 | unsigned pages; |
| 274 | unsigned flags; | 387 | unsigned flags; |
| 275 | int cpus; | 388 | int cpus; |
| 276 | cpumask_var_t cpumask; | ||
| 277 | atomic_t record_disabled; | 389 | atomic_t record_disabled; |
| 390 | cpumask_var_t cpumask; | ||
| 278 | 391 | ||
| 279 | struct mutex mutex; | 392 | struct mutex mutex; |
| 280 | 393 | ||
| 281 | struct ring_buffer_per_cpu **buffers; | 394 | struct ring_buffer_per_cpu **buffers; |
| 395 | |||
| 396 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 397 | struct notifier_block cpu_notify; | ||
| 398 | #endif | ||
| 399 | u64 (*clock)(void); | ||
| 282 | }; | 400 | }; |
| 283 | 401 | ||
| 284 | struct ring_buffer_iter { | 402 | struct ring_buffer_iter { |
| @@ -299,11 +417,35 @@ struct ring_buffer_iter { | |||
| 299 | _____ret; \ | 417 | _____ret; \ |
| 300 | }) | 418 | }) |
| 301 | 419 | ||
| 420 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | ||
| 421 | #define DEBUG_SHIFT 0 | ||
| 422 | |||
| 423 | u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) | ||
| 424 | { | ||
| 425 | u64 time; | ||
| 426 | |||
| 427 | preempt_disable_notrace(); | ||
| 428 | /* shift to debug/test normalization and TIME_EXTENTS */ | ||
| 429 | time = buffer->clock() << DEBUG_SHIFT; | ||
| 430 | preempt_enable_no_resched_notrace(); | ||
| 431 | |||
| 432 | return time; | ||
| 433 | } | ||
| 434 | EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); | ||
| 435 | |||
| 436 | void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, | ||
| 437 | int cpu, u64 *ts) | ||
| 438 | { | ||
| 439 | /* Just stupid testing the normalize function and deltas */ | ||
| 440 | *ts >>= DEBUG_SHIFT; | ||
| 441 | } | ||
| 442 | EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); | ||
| 443 | |||
| 302 | /** | 444 | /** |
| 303 | * check_pages - integrity check of buffer pages | 445 | * check_pages - integrity check of buffer pages |
| 304 | * @cpu_buffer: CPU buffer with pages to test | 446 | * @cpu_buffer: CPU buffer with pages to test |
| 305 | * | 447 | * |
| 306 | * As a safty measure we check to make sure the data pages have not | 448 | * As a safety measure we check to make sure the data pages have not |
| 307 | * been corrupted. | 449 | * been corrupted. |
| 308 | */ | 450 | */ |
| 309 | static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) | 451 | static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) |
| @@ -421,7 +563,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 421 | struct list_head *head = &cpu_buffer->pages; | 563 | struct list_head *head = &cpu_buffer->pages; |
| 422 | struct buffer_page *bpage, *tmp; | 564 | struct buffer_page *bpage, *tmp; |
| 423 | 565 | ||
| 424 | list_del_init(&cpu_buffer->reader_page->list); | ||
| 425 | free_buffer_page(cpu_buffer->reader_page); | 566 | free_buffer_page(cpu_buffer->reader_page); |
| 426 | 567 | ||
| 427 | list_for_each_entry_safe(bpage, tmp, head, list) { | 568 | list_for_each_entry_safe(bpage, tmp, head, list) { |
| @@ -437,6 +578,11 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 437 | */ | 578 | */ |
| 438 | extern int ring_buffer_page_too_big(void); | 579 | extern int ring_buffer_page_too_big(void); |
| 439 | 580 | ||
| 581 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 582 | static int rb_cpu_notify(struct notifier_block *self, | ||
| 583 | unsigned long action, void *hcpu); | ||
| 584 | #endif | ||
| 585 | |||
| 440 | /** | 586 | /** |
| 441 | * ring_buffer_alloc - allocate a new ring_buffer | 587 | * ring_buffer_alloc - allocate a new ring_buffer |
| 442 | * @size: the size in bytes per cpu that is needed. | 588 | * @size: the size in bytes per cpu that is needed. |
| @@ -469,12 +615,23 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) | |||
| 469 | 615 | ||
| 470 | buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 616 | buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
| 471 | buffer->flags = flags; | 617 | buffer->flags = flags; |
| 618 | buffer->clock = trace_clock_local; | ||
| 472 | 619 | ||
| 473 | /* need at least two pages */ | 620 | /* need at least two pages */ |
| 474 | if (buffer->pages == 1) | 621 | if (buffer->pages == 1) |
| 475 | buffer->pages++; | 622 | buffer->pages++; |
| 476 | 623 | ||
| 624 | /* | ||
| 625 | * In case of non-hotplug cpu, if the ring-buffer is allocated | ||
| 626 | * in early initcall, it will not be notified of secondary cpus. | ||
| 627 | * In that off case, we need to allocate for all possible cpus. | ||
| 628 | */ | ||
| 629 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 630 | get_online_cpus(); | ||
| 631 | cpumask_copy(buffer->cpumask, cpu_online_mask); | ||
| 632 | #else | ||
| 477 | cpumask_copy(buffer->cpumask, cpu_possible_mask); | 633 | cpumask_copy(buffer->cpumask, cpu_possible_mask); |
| 634 | #endif | ||
| 478 | buffer->cpus = nr_cpu_ids; | 635 | buffer->cpus = nr_cpu_ids; |
| 479 | 636 | ||
| 480 | bsize = sizeof(void *) * nr_cpu_ids; | 637 | bsize = sizeof(void *) * nr_cpu_ids; |
| @@ -490,6 +647,13 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) | |||
| 490 | goto fail_free_buffers; | 647 | goto fail_free_buffers; |
| 491 | } | 648 | } |
| 492 | 649 | ||
| 650 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 651 | buffer->cpu_notify.notifier_call = rb_cpu_notify; | ||
| 652 | buffer->cpu_notify.priority = 0; | ||
| 653 | register_cpu_notifier(&buffer->cpu_notify); | ||
| 654 | #endif | ||
| 655 | |||
| 656 | put_online_cpus(); | ||
| 493 | mutex_init(&buffer->mutex); | 657 | mutex_init(&buffer->mutex); |
| 494 | 658 | ||
| 495 | return buffer; | 659 | return buffer; |
| @@ -503,6 +667,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags) | |||
| 503 | 667 | ||
| 504 | fail_free_cpumask: | 668 | fail_free_cpumask: |
| 505 | free_cpumask_var(buffer->cpumask); | 669 | free_cpumask_var(buffer->cpumask); |
| 670 | put_online_cpus(); | ||
| 506 | 671 | ||
| 507 | fail_free_buffer: | 672 | fail_free_buffer: |
| 508 | kfree(buffer); | 673 | kfree(buffer); |
| @@ -519,15 +684,29 @@ ring_buffer_free(struct ring_buffer *buffer) | |||
| 519 | { | 684 | { |
| 520 | int cpu; | 685 | int cpu; |
| 521 | 686 | ||
| 687 | get_online_cpus(); | ||
| 688 | |||
| 689 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 690 | unregister_cpu_notifier(&buffer->cpu_notify); | ||
| 691 | #endif | ||
| 692 | |||
| 522 | for_each_buffer_cpu(buffer, cpu) | 693 | for_each_buffer_cpu(buffer, cpu) |
| 523 | rb_free_cpu_buffer(buffer->buffers[cpu]); | 694 | rb_free_cpu_buffer(buffer->buffers[cpu]); |
| 524 | 695 | ||
| 696 | put_online_cpus(); | ||
| 697 | |||
| 525 | free_cpumask_var(buffer->cpumask); | 698 | free_cpumask_var(buffer->cpumask); |
| 526 | 699 | ||
| 527 | kfree(buffer); | 700 | kfree(buffer); |
| 528 | } | 701 | } |
| 529 | EXPORT_SYMBOL_GPL(ring_buffer_free); | 702 | EXPORT_SYMBOL_GPL(ring_buffer_free); |
| 530 | 703 | ||
| 704 | void ring_buffer_set_clock(struct ring_buffer *buffer, | ||
| 705 | u64 (*clock)(void)) | ||
| 706 | { | ||
| 707 | buffer->clock = clock; | ||
| 708 | } | ||
| 709 | |||
| 531 | static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); | 710 | static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); |
| 532 | 711 | ||
| 533 | static void | 712 | static void |
| @@ -627,16 +806,15 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
| 627 | return size; | 806 | return size; |
| 628 | 807 | ||
| 629 | mutex_lock(&buffer->mutex); | 808 | mutex_lock(&buffer->mutex); |
| 809 | get_online_cpus(); | ||
| 630 | 810 | ||
| 631 | nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 811 | nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
| 632 | 812 | ||
| 633 | if (size < buffer_size) { | 813 | if (size < buffer_size) { |
| 634 | 814 | ||
| 635 | /* easy case, just free pages */ | 815 | /* easy case, just free pages */ |
| 636 | if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) { | 816 | if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) |
| 637 | mutex_unlock(&buffer->mutex); | 817 | goto out_fail; |
| 638 | return -1; | ||
| 639 | } | ||
| 640 | 818 | ||
| 641 | rm_pages = buffer->pages - nr_pages; | 819 | rm_pages = buffer->pages - nr_pages; |
| 642 | 820 | ||
| @@ -655,10 +833,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
| 655 | * add these pages to the cpu_buffers. Otherwise we just free | 833 | * add these pages to the cpu_buffers. Otherwise we just free |
| 656 | * them all and return -ENOMEM; | 834 | * them all and return -ENOMEM; |
| 657 | */ | 835 | */ |
| 658 | if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) { | 836 | if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) |
| 659 | mutex_unlock(&buffer->mutex); | 837 | goto out_fail; |
| 660 | return -1; | ||
| 661 | } | ||
| 662 | 838 | ||
| 663 | new_pages = nr_pages - buffer->pages; | 839 | new_pages = nr_pages - buffer->pages; |
| 664 | 840 | ||
| @@ -683,13 +859,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
| 683 | rb_insert_pages(cpu_buffer, &pages, new_pages); | 859 | rb_insert_pages(cpu_buffer, &pages, new_pages); |
| 684 | } | 860 | } |
| 685 | 861 | ||
| 686 | if (RB_WARN_ON(buffer, !list_empty(&pages))) { | 862 | if (RB_WARN_ON(buffer, !list_empty(&pages))) |
| 687 | mutex_unlock(&buffer->mutex); | 863 | goto out_fail; |
| 688 | return -1; | ||
| 689 | } | ||
| 690 | 864 | ||
| 691 | out: | 865 | out: |
| 692 | buffer->pages = nr_pages; | 866 | buffer->pages = nr_pages; |
| 867 | put_online_cpus(); | ||
| 693 | mutex_unlock(&buffer->mutex); | 868 | mutex_unlock(&buffer->mutex); |
| 694 | 869 | ||
| 695 | return size; | 870 | return size; |
| @@ -699,15 +874,20 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
| 699 | list_del_init(&bpage->list); | 874 | list_del_init(&bpage->list); |
| 700 | free_buffer_page(bpage); | 875 | free_buffer_page(bpage); |
| 701 | } | 876 | } |
| 877 | put_online_cpus(); | ||
| 702 | mutex_unlock(&buffer->mutex); | 878 | mutex_unlock(&buffer->mutex); |
| 703 | return -ENOMEM; | 879 | return -ENOMEM; |
| 704 | } | ||
| 705 | EXPORT_SYMBOL_GPL(ring_buffer_resize); | ||
| 706 | 880 | ||
| 707 | static inline int rb_null_event(struct ring_buffer_event *event) | 881 | /* |
| 708 | { | 882 | * Something went totally wrong, and we are too paranoid |
| 709 | return event->type == RINGBUF_TYPE_PADDING; | 883 | * to even clean up the mess. |
| 884 | */ | ||
| 885 | out_fail: | ||
| 886 | put_online_cpus(); | ||
| 887 | mutex_unlock(&buffer->mutex); | ||
| 888 | return -1; | ||
| 710 | } | 889 | } |
| 890 | EXPORT_SYMBOL_GPL(ring_buffer_resize); | ||
| 711 | 891 | ||
| 712 | static inline void * | 892 | static inline void * |
| 713 | __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) | 893 | __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) |
| @@ -811,7 +991,7 @@ rb_event_index(struct ring_buffer_event *event) | |||
| 811 | return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); | 991 | return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); |
| 812 | } | 992 | } |
| 813 | 993 | ||
| 814 | static inline int | 994 | static int |
| 815 | rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | 995 | rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, |
| 816 | struct ring_buffer_event *event) | 996 | struct ring_buffer_event *event) |
| 817 | { | 997 | { |
| @@ -825,7 +1005,7 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 825 | rb_commit_index(cpu_buffer) == index; | 1005 | rb_commit_index(cpu_buffer) == index; |
| 826 | } | 1006 | } |
| 827 | 1007 | ||
| 828 | static inline void | 1008 | static void |
| 829 | rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, | 1009 | rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, |
| 830 | struct ring_buffer_event *event) | 1010 | struct ring_buffer_event *event) |
| 831 | { | 1011 | { |
| @@ -850,7 +1030,7 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 850 | local_set(&cpu_buffer->commit_page->page->commit, index); | 1030 | local_set(&cpu_buffer->commit_page->page->commit, index); |
| 851 | } | 1031 | } |
| 852 | 1032 | ||
| 853 | static inline void | 1033 | static void |
| 854 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | 1034 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) |
| 855 | { | 1035 | { |
| 856 | /* | 1036 | /* |
| @@ -896,7 +1076,7 @@ static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 896 | cpu_buffer->reader_page->read = 0; | 1076 | cpu_buffer->reader_page->read = 0; |
| 897 | } | 1077 | } |
| 898 | 1078 | ||
| 899 | static inline void rb_inc_iter(struct ring_buffer_iter *iter) | 1079 | static void rb_inc_iter(struct ring_buffer_iter *iter) |
| 900 | { | 1080 | { |
| 901 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 1081 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
| 902 | 1082 | ||
| @@ -926,7 +1106,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter) | |||
| 926 | * and with this, we can determine what to place into the | 1106 | * and with this, we can determine what to place into the |
| 927 | * data field. | 1107 | * data field. |
| 928 | */ | 1108 | */ |
| 929 | static inline void | 1109 | static void |
| 930 | rb_update_event(struct ring_buffer_event *event, | 1110 | rb_update_event(struct ring_buffer_event *event, |
| 931 | unsigned type, unsigned length) | 1111 | unsigned type, unsigned length) |
| 932 | { | 1112 | { |
| @@ -938,15 +1118,11 @@ rb_update_event(struct ring_buffer_event *event, | |||
| 938 | break; | 1118 | break; |
| 939 | 1119 | ||
| 940 | case RINGBUF_TYPE_TIME_EXTEND: | 1120 | case RINGBUF_TYPE_TIME_EXTEND: |
| 941 | event->len = | 1121 | event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT); |
| 942 | (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1)) | ||
| 943 | >> RB_ALIGNMENT_SHIFT; | ||
| 944 | break; | 1122 | break; |
| 945 | 1123 | ||
| 946 | case RINGBUF_TYPE_TIME_STAMP: | 1124 | case RINGBUF_TYPE_TIME_STAMP: |
| 947 | event->len = | 1125 | event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT); |
| 948 | (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1)) | ||
| 949 | >> RB_ALIGNMENT_SHIFT; | ||
| 950 | break; | 1126 | break; |
| 951 | 1127 | ||
| 952 | case RINGBUF_TYPE_DATA: | 1128 | case RINGBUF_TYPE_DATA: |
| @@ -955,16 +1131,14 @@ rb_update_event(struct ring_buffer_event *event, | |||
| 955 | event->len = 0; | 1131 | event->len = 0; |
| 956 | event->array[0] = length; | 1132 | event->array[0] = length; |
| 957 | } else | 1133 | } else |
| 958 | event->len = | 1134 | event->len = DIV_ROUND_UP(length, RB_ALIGNMENT); |
| 959 | (length + (RB_ALIGNMENT-1)) | ||
| 960 | >> RB_ALIGNMENT_SHIFT; | ||
| 961 | break; | 1135 | break; |
| 962 | default: | 1136 | default: |
| 963 | BUG(); | 1137 | BUG(); |
| 964 | } | 1138 | } |
| 965 | } | 1139 | } |
| 966 | 1140 | ||
| 967 | static inline unsigned rb_calculate_event_length(unsigned length) | 1141 | static unsigned rb_calculate_event_length(unsigned length) |
| 968 | { | 1142 | { |
| 969 | struct ring_buffer_event event; /* Used only for sizeof array */ | 1143 | struct ring_buffer_event event; /* Used only for sizeof array */ |
| 970 | 1144 | ||
| @@ -990,6 +1164,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 990 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1164 | struct ring_buffer *buffer = cpu_buffer->buffer; |
| 991 | struct ring_buffer_event *event; | 1165 | struct ring_buffer_event *event; |
| 992 | unsigned long flags; | 1166 | unsigned long flags; |
| 1167 | bool lock_taken = false; | ||
| 993 | 1168 | ||
| 994 | commit_page = cpu_buffer->commit_page; | 1169 | commit_page = cpu_buffer->commit_page; |
| 995 | /* we just need to protect against interrupts */ | 1170 | /* we just need to protect against interrupts */ |
| @@ -1003,7 +1178,30 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1003 | struct buffer_page *next_page = tail_page; | 1178 | struct buffer_page *next_page = tail_page; |
| 1004 | 1179 | ||
| 1005 | local_irq_save(flags); | 1180 | local_irq_save(flags); |
| 1006 | __raw_spin_lock(&cpu_buffer->lock); | 1181 | /* |
| 1182 | * Since the write to the buffer is still not | ||
| 1183 | * fully lockless, we must be careful with NMIs. | ||
| 1184 | * The locks in the writers are taken when a write | ||
| 1185 | * crosses to a new page. The locks protect against | ||
| 1186 | * races with the readers (this will soon be fixed | ||
| 1187 | * with a lockless solution). | ||
| 1188 | * | ||
| 1189 | * Because we can not protect against NMIs, and we | ||
| 1190 | * want to keep traces reentrant, we need to manage | ||
| 1191 | * what happens when we are in an NMI. | ||
| 1192 | * | ||
| 1193 | * NMIs can happen after we take the lock. | ||
| 1194 | * If we are in an NMI, only take the lock | ||
| 1195 | * if it is not already taken. Otherwise | ||
| 1196 | * simply fail. | ||
| 1197 | */ | ||
| 1198 | if (unlikely(in_nmi())) { | ||
| 1199 | if (!__raw_spin_trylock(&cpu_buffer->lock)) | ||
| 1200 | goto out_reset; | ||
| 1201 | } else | ||
| 1202 | __raw_spin_lock(&cpu_buffer->lock); | ||
| 1203 | |||
| 1204 | lock_taken = true; | ||
| 1007 | 1205 | ||
| 1008 | rb_inc_page(cpu_buffer, &next_page); | 1206 | rb_inc_page(cpu_buffer, &next_page); |
| 1009 | 1207 | ||
| @@ -1012,7 +1210,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1012 | 1210 | ||
| 1013 | /* we grabbed the lock before incrementing */ | 1211 | /* we grabbed the lock before incrementing */ |
| 1014 | if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) | 1212 | if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) |
| 1015 | goto out_unlock; | 1213 | goto out_reset; |
| 1016 | 1214 | ||
| 1017 | /* | 1215 | /* |
| 1018 | * If for some reason, we had an interrupt storm that made | 1216 | * If for some reason, we had an interrupt storm that made |
| @@ -1021,12 +1219,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1021 | */ | 1219 | */ |
| 1022 | if (unlikely(next_page == commit_page)) { | 1220 | if (unlikely(next_page == commit_page)) { |
| 1023 | WARN_ON_ONCE(1); | 1221 | WARN_ON_ONCE(1); |
| 1024 | goto out_unlock; | 1222 | goto out_reset; |
| 1025 | } | 1223 | } |
| 1026 | 1224 | ||
| 1027 | if (next_page == head_page) { | 1225 | if (next_page == head_page) { |
| 1028 | if (!(buffer->flags & RB_FL_OVERWRITE)) | 1226 | if (!(buffer->flags & RB_FL_OVERWRITE)) |
| 1029 | goto out_unlock; | 1227 | goto out_reset; |
| 1030 | 1228 | ||
| 1031 | /* tail_page has not moved yet? */ | 1229 | /* tail_page has not moved yet? */ |
| 1032 | if (tail_page == cpu_buffer->tail_page) { | 1230 | if (tail_page == cpu_buffer->tail_page) { |
| @@ -1050,7 +1248,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1050 | cpu_buffer->tail_page = next_page; | 1248 | cpu_buffer->tail_page = next_page; |
| 1051 | 1249 | ||
| 1052 | /* reread the time stamp */ | 1250 | /* reread the time stamp */ |
| 1053 | *ts = ring_buffer_time_stamp(cpu_buffer->cpu); | 1251 | *ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu); |
| 1054 | cpu_buffer->tail_page->page->time_stamp = *ts; | 1252 | cpu_buffer->tail_page->page->time_stamp = *ts; |
| 1055 | } | 1253 | } |
| 1056 | 1254 | ||
| @@ -1060,7 +1258,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1060 | if (tail < BUF_PAGE_SIZE) { | 1258 | if (tail < BUF_PAGE_SIZE) { |
| 1061 | /* Mark the rest of the page with padding */ | 1259 | /* Mark the rest of the page with padding */ |
| 1062 | event = __rb_page_index(tail_page, tail); | 1260 | event = __rb_page_index(tail_page, tail); |
| 1063 | event->type = RINGBUF_TYPE_PADDING; | 1261 | rb_event_set_padding(event); |
| 1064 | } | 1262 | } |
| 1065 | 1263 | ||
| 1066 | if (tail <= BUF_PAGE_SIZE) | 1264 | if (tail <= BUF_PAGE_SIZE) |
| @@ -1100,12 +1298,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1100 | 1298 | ||
| 1101 | return event; | 1299 | return event; |
| 1102 | 1300 | ||
| 1103 | out_unlock: | 1301 | out_reset: |
| 1104 | /* reset write */ | 1302 | /* reset write */ |
| 1105 | if (tail <= BUF_PAGE_SIZE) | 1303 | if (tail <= BUF_PAGE_SIZE) |
| 1106 | local_set(&tail_page->write, tail); | 1304 | local_set(&tail_page->write, tail); |
| 1107 | 1305 | ||
| 1108 | __raw_spin_unlock(&cpu_buffer->lock); | 1306 | if (likely(lock_taken)) |
| 1307 | __raw_spin_unlock(&cpu_buffer->lock); | ||
| 1109 | local_irq_restore(flags); | 1308 | local_irq_restore(flags); |
| 1110 | return NULL; | 1309 | return NULL; |
| 1111 | } | 1310 | } |
| @@ -1192,7 +1391,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1192 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) | 1391 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) |
| 1193 | return NULL; | 1392 | return NULL; |
| 1194 | 1393 | ||
| 1195 | ts = ring_buffer_time_stamp(cpu_buffer->cpu); | 1394 | ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); |
| 1196 | 1395 | ||
| 1197 | /* | 1396 | /* |
| 1198 | * Only the first commit can update the timestamp. | 1397 | * Only the first commit can update the timestamp. |
| @@ -1265,7 +1464,6 @@ static DEFINE_PER_CPU(int, rb_need_resched); | |||
| 1265 | * ring_buffer_lock_reserve - reserve a part of the buffer | 1464 | * ring_buffer_lock_reserve - reserve a part of the buffer |
| 1266 | * @buffer: the ring buffer to reserve from | 1465 | * @buffer: the ring buffer to reserve from |
| 1267 | * @length: the length of the data to reserve (excluding event header) | 1466 | * @length: the length of the data to reserve (excluding event header) |
| 1268 | * @flags: a pointer to save the interrupt flags | ||
| 1269 | * | 1467 | * |
| 1270 | * Returns a reseverd event on the ring buffer to copy directly to. | 1468 | * Returns a reseverd event on the ring buffer to copy directly to. |
| 1271 | * The user of this interface will need to get the body to write into | 1469 | * The user of this interface will need to get the body to write into |
| @@ -1278,9 +1476,7 @@ static DEFINE_PER_CPU(int, rb_need_resched); | |||
| 1278 | * If NULL is returned, then nothing has been allocated or locked. | 1476 | * If NULL is returned, then nothing has been allocated or locked. |
| 1279 | */ | 1477 | */ |
| 1280 | struct ring_buffer_event * | 1478 | struct ring_buffer_event * |
| 1281 | ring_buffer_lock_reserve(struct ring_buffer *buffer, | 1479 | ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) |
| 1282 | unsigned long length, | ||
| 1283 | unsigned long *flags) | ||
| 1284 | { | 1480 | { |
| 1285 | struct ring_buffer_per_cpu *cpu_buffer; | 1481 | struct ring_buffer_per_cpu *cpu_buffer; |
| 1286 | struct ring_buffer_event *event; | 1482 | struct ring_buffer_event *event; |
| @@ -1347,15 +1543,13 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1347 | * ring_buffer_unlock_commit - commit a reserved | 1543 | * ring_buffer_unlock_commit - commit a reserved |
| 1348 | * @buffer: The buffer to commit to | 1544 | * @buffer: The buffer to commit to |
| 1349 | * @event: The event pointer to commit. | 1545 | * @event: The event pointer to commit. |
| 1350 | * @flags: the interrupt flags received from ring_buffer_lock_reserve. | ||
| 1351 | * | 1546 | * |
| 1352 | * This commits the data to the ring buffer, and releases any locks held. | 1547 | * This commits the data to the ring buffer, and releases any locks held. |
| 1353 | * | 1548 | * |
| 1354 | * Must be paired with ring_buffer_lock_reserve. | 1549 | * Must be paired with ring_buffer_lock_reserve. |
| 1355 | */ | 1550 | */ |
| 1356 | int ring_buffer_unlock_commit(struct ring_buffer *buffer, | 1551 | int ring_buffer_unlock_commit(struct ring_buffer *buffer, |
| 1357 | struct ring_buffer_event *event, | 1552 | struct ring_buffer_event *event) |
| 1358 | unsigned long flags) | ||
| 1359 | { | 1553 | { |
| 1360 | struct ring_buffer_per_cpu *cpu_buffer; | 1554 | struct ring_buffer_per_cpu *cpu_buffer; |
| 1361 | int cpu = raw_smp_processor_id(); | 1555 | int cpu = raw_smp_processor_id(); |
| @@ -1438,7 +1632,7 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
| 1438 | } | 1632 | } |
| 1439 | EXPORT_SYMBOL_GPL(ring_buffer_write); | 1633 | EXPORT_SYMBOL_GPL(ring_buffer_write); |
| 1440 | 1634 | ||
| 1441 | static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) | 1635 | static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) |
| 1442 | { | 1636 | { |
| 1443 | struct buffer_page *reader = cpu_buffer->reader_page; | 1637 | struct buffer_page *reader = cpu_buffer->reader_page; |
| 1444 | struct buffer_page *head = cpu_buffer->head_page; | 1638 | struct buffer_page *head = cpu_buffer->head_page; |
| @@ -1528,12 +1722,15 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); | |||
| 1528 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) | 1722 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) |
| 1529 | { | 1723 | { |
| 1530 | struct ring_buffer_per_cpu *cpu_buffer; | 1724 | struct ring_buffer_per_cpu *cpu_buffer; |
| 1725 | unsigned long ret; | ||
| 1531 | 1726 | ||
| 1532 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 1727 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
| 1533 | return 0; | 1728 | return 0; |
| 1534 | 1729 | ||
| 1535 | cpu_buffer = buffer->buffers[cpu]; | 1730 | cpu_buffer = buffer->buffers[cpu]; |
| 1536 | return cpu_buffer->entries; | 1731 | ret = cpu_buffer->entries; |
| 1732 | |||
| 1733 | return ret; | ||
| 1537 | } | 1734 | } |
| 1538 | EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); | 1735 | EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); |
| 1539 | 1736 | ||
| @@ -1545,12 +1742,15 @@ EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); | |||
| 1545 | unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) | 1742 | unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) |
| 1546 | { | 1743 | { |
| 1547 | struct ring_buffer_per_cpu *cpu_buffer; | 1744 | struct ring_buffer_per_cpu *cpu_buffer; |
| 1745 | unsigned long ret; | ||
| 1548 | 1746 | ||
| 1549 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 1747 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
| 1550 | return 0; | 1748 | return 0; |
| 1551 | 1749 | ||
| 1552 | cpu_buffer = buffer->buffers[cpu]; | 1750 | cpu_buffer = buffer->buffers[cpu]; |
| 1553 | return cpu_buffer->overrun; | 1751 | ret = cpu_buffer->overrun; |
| 1752 | |||
| 1753 | return ret; | ||
| 1554 | } | 1754 | } |
| 1555 | EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); | 1755 | EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); |
| 1556 | 1756 | ||
| @@ -1627,9 +1827,14 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) | |||
| 1627 | */ | 1827 | */ |
| 1628 | void ring_buffer_iter_reset(struct ring_buffer_iter *iter) | 1828 | void ring_buffer_iter_reset(struct ring_buffer_iter *iter) |
| 1629 | { | 1829 | { |
| 1630 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 1830 | struct ring_buffer_per_cpu *cpu_buffer; |
| 1631 | unsigned long flags; | 1831 | unsigned long flags; |
| 1632 | 1832 | ||
| 1833 | if (!iter) | ||
| 1834 | return; | ||
| 1835 | |||
| 1836 | cpu_buffer = iter->cpu_buffer; | ||
| 1837 | |||
| 1633 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 1838 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| 1634 | rb_iter_reset(iter); | 1839 | rb_iter_reset(iter); |
| 1635 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 1840 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| @@ -1803,7 +2008,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 1803 | 2008 | ||
| 1804 | event = rb_reader_event(cpu_buffer); | 2009 | event = rb_reader_event(cpu_buffer); |
| 1805 | 2010 | ||
| 1806 | if (event->type == RINGBUF_TYPE_DATA) | 2011 | if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event)) |
| 1807 | cpu_buffer->entries--; | 2012 | cpu_buffer->entries--; |
| 1808 | 2013 | ||
| 1809 | rb_update_read_stamp(cpu_buffer, event); | 2014 | rb_update_read_stamp(cpu_buffer, event); |
| @@ -1864,9 +2069,6 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 1864 | struct buffer_page *reader; | 2069 | struct buffer_page *reader; |
| 1865 | int nr_loops = 0; | 2070 | int nr_loops = 0; |
| 1866 | 2071 | ||
| 1867 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
| 1868 | return NULL; | ||
| 1869 | |||
| 1870 | cpu_buffer = buffer->buffers[cpu]; | 2072 | cpu_buffer = buffer->buffers[cpu]; |
| 1871 | 2073 | ||
| 1872 | again: | 2074 | again: |
| @@ -1889,9 +2091,18 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 1889 | 2091 | ||
| 1890 | switch (event->type) { | 2092 | switch (event->type) { |
| 1891 | case RINGBUF_TYPE_PADDING: | 2093 | case RINGBUF_TYPE_PADDING: |
| 1892 | RB_WARN_ON(cpu_buffer, 1); | 2094 | if (rb_null_event(event)) |
| 2095 | RB_WARN_ON(cpu_buffer, 1); | ||
| 2096 | /* | ||
| 2097 | * Because the writer could be discarding every | ||
| 2098 | * event it creates (which would probably be bad) | ||
| 2099 | * if we were to go back to "again" then we may never | ||
| 2100 | * catch up, and will trigger the warn on, or lock | ||
| 2101 | * the box. Return the padding, and we will release | ||
| 2102 | * the current locks, and try again. | ||
| 2103 | */ | ||
| 1893 | rb_advance_reader(cpu_buffer); | 2104 | rb_advance_reader(cpu_buffer); |
| 1894 | return NULL; | 2105 | return event; |
| 1895 | 2106 | ||
| 1896 | case RINGBUF_TYPE_TIME_EXTEND: | 2107 | case RINGBUF_TYPE_TIME_EXTEND: |
| 1897 | /* Internal data, OK to advance */ | 2108 | /* Internal data, OK to advance */ |
| @@ -1906,7 +2117,8 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 1906 | case RINGBUF_TYPE_DATA: | 2117 | case RINGBUF_TYPE_DATA: |
| 1907 | if (ts) { | 2118 | if (ts) { |
| 1908 | *ts = cpu_buffer->read_stamp + event->time_delta; | 2119 | *ts = cpu_buffer->read_stamp + event->time_delta; |
| 1909 | ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts); | 2120 | ring_buffer_normalize_time_stamp(buffer, |
| 2121 | cpu_buffer->cpu, ts); | ||
| 1910 | } | 2122 | } |
| 1911 | return event; | 2123 | return event; |
| 1912 | 2124 | ||
| @@ -1951,8 +2163,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 1951 | 2163 | ||
| 1952 | switch (event->type) { | 2164 | switch (event->type) { |
| 1953 | case RINGBUF_TYPE_PADDING: | 2165 | case RINGBUF_TYPE_PADDING: |
| 1954 | rb_inc_iter(iter); | 2166 | if (rb_null_event(event)) { |
| 1955 | goto again; | 2167 | rb_inc_iter(iter); |
| 2168 | goto again; | ||
| 2169 | } | ||
| 2170 | rb_advance_iter(iter); | ||
| 2171 | return event; | ||
| 1956 | 2172 | ||
| 1957 | case RINGBUF_TYPE_TIME_EXTEND: | 2173 | case RINGBUF_TYPE_TIME_EXTEND: |
| 1958 | /* Internal data, OK to advance */ | 2174 | /* Internal data, OK to advance */ |
| @@ -1967,7 +2183,8 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 1967 | case RINGBUF_TYPE_DATA: | 2183 | case RINGBUF_TYPE_DATA: |
| 1968 | if (ts) { | 2184 | if (ts) { |
| 1969 | *ts = iter->read_stamp + event->time_delta; | 2185 | *ts = iter->read_stamp + event->time_delta; |
| 1970 | ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts); | 2186 | ring_buffer_normalize_time_stamp(buffer, |
| 2187 | cpu_buffer->cpu, ts); | ||
| 1971 | } | 2188 | } |
| 1972 | return event; | 2189 | return event; |
| 1973 | 2190 | ||
| @@ -1995,10 +2212,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
| 1995 | struct ring_buffer_event *event; | 2212 | struct ring_buffer_event *event; |
| 1996 | unsigned long flags; | 2213 | unsigned long flags; |
| 1997 | 2214 | ||
| 2215 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
| 2216 | return NULL; | ||
| 2217 | |||
| 2218 | again: | ||
| 1998 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2219 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| 1999 | event = rb_buffer_peek(buffer, cpu, ts); | 2220 | event = rb_buffer_peek(buffer, cpu, ts); |
| 2000 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2221 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| 2001 | 2222 | ||
| 2223 | if (event && event->type == RINGBUF_TYPE_PADDING) { | ||
| 2224 | cpu_relax(); | ||
| 2225 | goto again; | ||
| 2226 | } | ||
| 2227 | |||
| 2002 | return event; | 2228 | return event; |
| 2003 | } | 2229 | } |
| 2004 | 2230 | ||
| @@ -2017,10 +2243,16 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 2017 | struct ring_buffer_event *event; | 2243 | struct ring_buffer_event *event; |
| 2018 | unsigned long flags; | 2244 | unsigned long flags; |
| 2019 | 2245 | ||
| 2246 | again: | ||
| 2020 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2247 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| 2021 | event = rb_iter_peek(iter, ts); | 2248 | event = rb_iter_peek(iter, ts); |
| 2022 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2249 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| 2023 | 2250 | ||
| 2251 | if (event && event->type == RINGBUF_TYPE_PADDING) { | ||
| 2252 | cpu_relax(); | ||
| 2253 | goto again; | ||
| 2254 | } | ||
| 2255 | |||
| 2024 | return event; | 2256 | return event; |
| 2025 | } | 2257 | } |
| 2026 | 2258 | ||
| @@ -2035,24 +2267,37 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
| 2035 | struct ring_buffer_event * | 2267 | struct ring_buffer_event * |
| 2036 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | 2268 | ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) |
| 2037 | { | 2269 | { |
| 2038 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 2270 | struct ring_buffer_per_cpu *cpu_buffer; |
| 2039 | struct ring_buffer_event *event; | 2271 | struct ring_buffer_event *event = NULL; |
| 2040 | unsigned long flags; | 2272 | unsigned long flags; |
| 2041 | 2273 | ||
| 2274 | again: | ||
| 2275 | /* might be called in atomic */ | ||
| 2276 | preempt_disable(); | ||
| 2277 | |||
| 2042 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2278 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
| 2043 | return NULL; | 2279 | goto out; |
| 2044 | 2280 | ||
| 2281 | cpu_buffer = buffer->buffers[cpu]; | ||
| 2045 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2282 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| 2046 | 2283 | ||
| 2047 | event = rb_buffer_peek(buffer, cpu, ts); | 2284 | event = rb_buffer_peek(buffer, cpu, ts); |
| 2048 | if (!event) | 2285 | if (!event) |
| 2049 | goto out; | 2286 | goto out_unlock; |
| 2050 | 2287 | ||
| 2051 | rb_advance_reader(cpu_buffer); | 2288 | rb_advance_reader(cpu_buffer); |
| 2052 | 2289 | ||
| 2053 | out: | 2290 | out_unlock: |
| 2054 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2291 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| 2055 | 2292 | ||
| 2293 | out: | ||
| 2294 | preempt_enable(); | ||
| 2295 | |||
| 2296 | if (event && event->type == RINGBUF_TYPE_PADDING) { | ||
| 2297 | cpu_relax(); | ||
| 2298 | goto again; | ||
| 2299 | } | ||
| 2300 | |||
| 2056 | return event; | 2301 | return event; |
| 2057 | } | 2302 | } |
| 2058 | EXPORT_SYMBOL_GPL(ring_buffer_consume); | 2303 | EXPORT_SYMBOL_GPL(ring_buffer_consume); |
| @@ -2131,6 +2376,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | |||
| 2131 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 2376 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
| 2132 | unsigned long flags; | 2377 | unsigned long flags; |
| 2133 | 2378 | ||
| 2379 | again: | ||
| 2134 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2380 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| 2135 | event = rb_iter_peek(iter, ts); | 2381 | event = rb_iter_peek(iter, ts); |
| 2136 | if (!event) | 2382 | if (!event) |
| @@ -2140,6 +2386,11 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | |||
| 2140 | out: | 2386 | out: |
| 2141 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2387 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| 2142 | 2388 | ||
| 2389 | if (event && event->type == RINGBUF_TYPE_PADDING) { | ||
| 2390 | cpu_relax(); | ||
| 2391 | goto again; | ||
| 2392 | } | ||
| 2393 | |||
| 2143 | return event; | 2394 | return event; |
| 2144 | } | 2395 | } |
| 2145 | EXPORT_SYMBOL_GPL(ring_buffer_read); | 2396 | EXPORT_SYMBOL_GPL(ring_buffer_read); |
| @@ -2232,6 +2483,7 @@ int ring_buffer_empty(struct ring_buffer *buffer) | |||
| 2232 | if (!rb_per_cpu_empty(cpu_buffer)) | 2483 | if (!rb_per_cpu_empty(cpu_buffer)) |
| 2233 | return 0; | 2484 | return 0; |
| 2234 | } | 2485 | } |
| 2486 | |||
| 2235 | return 1; | 2487 | return 1; |
| 2236 | } | 2488 | } |
| 2237 | EXPORT_SYMBOL_GPL(ring_buffer_empty); | 2489 | EXPORT_SYMBOL_GPL(ring_buffer_empty); |
| @@ -2244,12 +2496,16 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty); | |||
| 2244 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) | 2496 | int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) |
| 2245 | { | 2497 | { |
| 2246 | struct ring_buffer_per_cpu *cpu_buffer; | 2498 | struct ring_buffer_per_cpu *cpu_buffer; |
| 2499 | int ret; | ||
| 2247 | 2500 | ||
| 2248 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2501 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
| 2249 | return 1; | 2502 | return 1; |
| 2250 | 2503 | ||
| 2251 | cpu_buffer = buffer->buffers[cpu]; | 2504 | cpu_buffer = buffer->buffers[cpu]; |
| 2252 | return rb_per_cpu_empty(cpu_buffer); | 2505 | ret = rb_per_cpu_empty(cpu_buffer); |
| 2506 | |||
| 2507 | |||
| 2508 | return ret; | ||
| 2253 | } | 2509 | } |
| 2254 | EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); | 2510 | EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); |
| 2255 | 2511 | ||
| @@ -2268,18 +2524,36 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | |||
| 2268 | { | 2524 | { |
| 2269 | struct ring_buffer_per_cpu *cpu_buffer_a; | 2525 | struct ring_buffer_per_cpu *cpu_buffer_a; |
| 2270 | struct ring_buffer_per_cpu *cpu_buffer_b; | 2526 | struct ring_buffer_per_cpu *cpu_buffer_b; |
| 2527 | int ret = -EINVAL; | ||
| 2271 | 2528 | ||
| 2272 | if (!cpumask_test_cpu(cpu, buffer_a->cpumask) || | 2529 | if (!cpumask_test_cpu(cpu, buffer_a->cpumask) || |
| 2273 | !cpumask_test_cpu(cpu, buffer_b->cpumask)) | 2530 | !cpumask_test_cpu(cpu, buffer_b->cpumask)) |
| 2274 | return -EINVAL; | 2531 | goto out; |
| 2275 | 2532 | ||
| 2276 | /* At least make sure the two buffers are somewhat the same */ | 2533 | /* At least make sure the two buffers are somewhat the same */ |
| 2277 | if (buffer_a->pages != buffer_b->pages) | 2534 | if (buffer_a->pages != buffer_b->pages) |
| 2278 | return -EINVAL; | 2535 | goto out; |
| 2536 | |||
| 2537 | ret = -EAGAIN; | ||
| 2538 | |||
| 2539 | if (ring_buffer_flags != RB_BUFFERS_ON) | ||
| 2540 | goto out; | ||
| 2541 | |||
| 2542 | if (atomic_read(&buffer_a->record_disabled)) | ||
| 2543 | goto out; | ||
| 2544 | |||
| 2545 | if (atomic_read(&buffer_b->record_disabled)) | ||
| 2546 | goto out; | ||
| 2279 | 2547 | ||
| 2280 | cpu_buffer_a = buffer_a->buffers[cpu]; | 2548 | cpu_buffer_a = buffer_a->buffers[cpu]; |
| 2281 | cpu_buffer_b = buffer_b->buffers[cpu]; | 2549 | cpu_buffer_b = buffer_b->buffers[cpu]; |
| 2282 | 2550 | ||
| 2551 | if (atomic_read(&cpu_buffer_a->record_disabled)) | ||
| 2552 | goto out; | ||
| 2553 | |||
| 2554 | if (atomic_read(&cpu_buffer_b->record_disabled)) | ||
| 2555 | goto out; | ||
| 2556 | |||
| 2283 | /* | 2557 | /* |
| 2284 | * We can't do a synchronize_sched here because this | 2558 | * We can't do a synchronize_sched here because this |
| 2285 | * function can be called in atomic context. | 2559 | * function can be called in atomic context. |
| @@ -2298,18 +2572,21 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | |||
| 2298 | atomic_dec(&cpu_buffer_a->record_disabled); | 2572 | atomic_dec(&cpu_buffer_a->record_disabled); |
| 2299 | atomic_dec(&cpu_buffer_b->record_disabled); | 2573 | atomic_dec(&cpu_buffer_b->record_disabled); |
| 2300 | 2574 | ||
| 2301 | return 0; | 2575 | ret = 0; |
| 2576 | out: | ||
| 2577 | return ret; | ||
| 2302 | } | 2578 | } |
| 2303 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); | 2579 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); |
| 2304 | 2580 | ||
| 2305 | static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, | 2581 | static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, |
| 2306 | struct buffer_data_page *bpage) | 2582 | struct buffer_data_page *bpage, |
| 2583 | unsigned int offset) | ||
| 2307 | { | 2584 | { |
| 2308 | struct ring_buffer_event *event; | 2585 | struct ring_buffer_event *event; |
| 2309 | unsigned long head; | 2586 | unsigned long head; |
| 2310 | 2587 | ||
| 2311 | __raw_spin_lock(&cpu_buffer->lock); | 2588 | __raw_spin_lock(&cpu_buffer->lock); |
| 2312 | for (head = 0; head < local_read(&bpage->commit); | 2589 | for (head = offset; head < local_read(&bpage->commit); |
| 2313 | head += rb_event_length(event)) { | 2590 | head += rb_event_length(event)) { |
| 2314 | 2591 | ||
| 2315 | event = __rb_data_page_index(bpage, head); | 2592 | event = __rb_data_page_index(bpage, head); |
| @@ -2340,8 +2617,8 @@ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 2340 | */ | 2617 | */ |
| 2341 | void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) | 2618 | void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) |
| 2342 | { | 2619 | { |
| 2343 | unsigned long addr; | ||
| 2344 | struct buffer_data_page *bpage; | 2620 | struct buffer_data_page *bpage; |
| 2621 | unsigned long addr; | ||
| 2345 | 2622 | ||
| 2346 | addr = __get_free_page(GFP_KERNEL); | 2623 | addr = __get_free_page(GFP_KERNEL); |
| 2347 | if (!addr) | 2624 | if (!addr) |
| @@ -2349,6 +2626,8 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) | |||
| 2349 | 2626 | ||
| 2350 | bpage = (void *)addr; | 2627 | bpage = (void *)addr; |
| 2351 | 2628 | ||
| 2629 | rb_init_page(bpage); | ||
| 2630 | |||
| 2352 | return bpage; | 2631 | return bpage; |
| 2353 | } | 2632 | } |
| 2354 | 2633 | ||
| @@ -2368,6 +2647,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) | |||
| 2368 | * ring_buffer_read_page - extract a page from the ring buffer | 2647 | * ring_buffer_read_page - extract a page from the ring buffer |
| 2369 | * @buffer: buffer to extract from | 2648 | * @buffer: buffer to extract from |
| 2370 | * @data_page: the page to use allocated from ring_buffer_alloc_read_page | 2649 | * @data_page: the page to use allocated from ring_buffer_alloc_read_page |
| 2650 | * @len: amount to extract | ||
| 2371 | * @cpu: the cpu of the buffer to extract | 2651 | * @cpu: the cpu of the buffer to extract |
| 2372 | * @full: should the extraction only happen when the page is full. | 2652 | * @full: should the extraction only happen when the page is full. |
| 2373 | * | 2653 | * |
| @@ -2377,12 +2657,12 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) | |||
| 2377 | * to swap with a page in the ring buffer. | 2657 | * to swap with a page in the ring buffer. |
| 2378 | * | 2658 | * |
| 2379 | * for example: | 2659 | * for example: |
| 2380 | * rpage = ring_buffer_alloc_page(buffer); | 2660 | * rpage = ring_buffer_alloc_read_page(buffer); |
| 2381 | * if (!rpage) | 2661 | * if (!rpage) |
| 2382 | * return error; | 2662 | * return error; |
| 2383 | * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0); | 2663 | * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); |
| 2384 | * if (ret) | 2664 | * if (ret >= 0) |
| 2385 | * process_page(rpage); | 2665 | * process_page(rpage, ret); |
| 2386 | * | 2666 | * |
| 2387 | * When @full is set, the function will not return true unless | 2667 | * When @full is set, the function will not return true unless |
| 2388 | * the writer is off the reader page. | 2668 | * the writer is off the reader page. |
| @@ -2393,72 +2673,118 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) | |||
| 2393 | * responsible for that. | 2673 | * responsible for that. |
| 2394 | * | 2674 | * |
| 2395 | * Returns: | 2675 | * Returns: |
| 2396 | * 1 if data has been transferred | 2676 | * >=0 if data has been transferred, returns the offset of consumed data. |
| 2397 | * 0 if no data has been transferred. | 2677 | * <0 if no data has been transferred. |
| 2398 | */ | 2678 | */ |
| 2399 | int ring_buffer_read_page(struct ring_buffer *buffer, | 2679 | int ring_buffer_read_page(struct ring_buffer *buffer, |
| 2400 | void **data_page, int cpu, int full) | 2680 | void **data_page, size_t len, int cpu, int full) |
| 2401 | { | 2681 | { |
| 2402 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 2682 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
| 2403 | struct ring_buffer_event *event; | 2683 | struct ring_buffer_event *event; |
| 2404 | struct buffer_data_page *bpage; | 2684 | struct buffer_data_page *bpage; |
| 2685 | struct buffer_page *reader; | ||
| 2405 | unsigned long flags; | 2686 | unsigned long flags; |
| 2406 | int ret = 0; | 2687 | unsigned int commit; |
| 2688 | unsigned int read; | ||
| 2689 | u64 save_timestamp; | ||
| 2690 | int ret = -1; | ||
| 2691 | |||
| 2692 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
| 2693 | goto out; | ||
| 2694 | |||
| 2695 | /* | ||
| 2696 | * If len is not big enough to hold the page header, then | ||
| 2697 | * we can not copy anything. | ||
| 2698 | */ | ||
| 2699 | if (len <= BUF_PAGE_HDR_SIZE) | ||
| 2700 | goto out; | ||
| 2701 | |||
| 2702 | len -= BUF_PAGE_HDR_SIZE; | ||
| 2407 | 2703 | ||
| 2408 | if (!data_page) | 2704 | if (!data_page) |
| 2409 | return 0; | 2705 | goto out; |
| 2410 | 2706 | ||
| 2411 | bpage = *data_page; | 2707 | bpage = *data_page; |
| 2412 | if (!bpage) | 2708 | if (!bpage) |
| 2413 | return 0; | 2709 | goto out; |
| 2414 | 2710 | ||
| 2415 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2711 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| 2416 | 2712 | ||
| 2417 | /* | 2713 | reader = rb_get_reader_page(cpu_buffer); |
| 2418 | * rb_buffer_peek will get the next ring buffer if | 2714 | if (!reader) |
| 2419 | * the current reader page is empty. | 2715 | goto out_unlock; |
| 2420 | */ | 2716 | |
| 2421 | event = rb_buffer_peek(buffer, cpu, NULL); | 2717 | event = rb_reader_event(cpu_buffer); |
| 2422 | if (!event) | 2718 | |
| 2423 | goto out; | 2719 | read = reader->read; |
| 2720 | commit = rb_page_commit(reader); | ||
| 2424 | 2721 | ||
| 2425 | /* check for data */ | ||
| 2426 | if (!local_read(&cpu_buffer->reader_page->page->commit)) | ||
| 2427 | goto out; | ||
| 2428 | /* | 2722 | /* |
| 2429 | * If the writer is already off of the read page, then simply | 2723 | * If this page has been partially read or |
| 2430 | * switch the read page with the given page. Otherwise | 2724 | * if len is not big enough to read the rest of the page or |
| 2431 | * we need to copy the data from the reader to the writer. | 2725 | * a writer is still on the page, then |
| 2726 | * we must copy the data from the page to the buffer. | ||
| 2727 | * Otherwise, we can simply swap the page with the one passed in. | ||
| 2432 | */ | 2728 | */ |
| 2433 | if (cpu_buffer->reader_page == cpu_buffer->commit_page) { | 2729 | if (read || (len < (commit - read)) || |
| 2434 | unsigned int read = cpu_buffer->reader_page->read; | 2730 | cpu_buffer->reader_page == cpu_buffer->commit_page) { |
| 2731 | struct buffer_data_page *rpage = cpu_buffer->reader_page->page; | ||
| 2732 | unsigned int rpos = read; | ||
| 2733 | unsigned int pos = 0; | ||
| 2734 | unsigned int size; | ||
| 2435 | 2735 | ||
| 2436 | if (full) | 2736 | if (full) |
| 2437 | goto out; | 2737 | goto out_unlock; |
| 2438 | /* The writer is still on the reader page, we must copy */ | 2738 | |
| 2439 | bpage = cpu_buffer->reader_page->page; | 2739 | if (len > (commit - read)) |
| 2440 | memcpy(bpage->data, | 2740 | len = (commit - read); |
| 2441 | cpu_buffer->reader_page->page->data + read, | 2741 | |
| 2442 | local_read(&bpage->commit) - read); | 2742 | size = rb_event_length(event); |
| 2743 | |||
| 2744 | if (len < size) | ||
| 2745 | goto out_unlock; | ||
| 2443 | 2746 | ||
| 2444 | /* consume what was read */ | 2747 | /* save the current timestamp, since the user will need it */ |
| 2445 | cpu_buffer->reader_page += read; | 2748 | save_timestamp = cpu_buffer->read_stamp; |
| 2446 | 2749 | ||
| 2750 | /* Need to copy one event at a time */ | ||
| 2751 | do { | ||
| 2752 | memcpy(bpage->data + pos, rpage->data + rpos, size); | ||
| 2753 | |||
| 2754 | len -= size; | ||
| 2755 | |||
| 2756 | rb_advance_reader(cpu_buffer); | ||
| 2757 | rpos = reader->read; | ||
| 2758 | pos += size; | ||
| 2759 | |||
| 2760 | event = rb_reader_event(cpu_buffer); | ||
| 2761 | size = rb_event_length(event); | ||
| 2762 | } while (len > size); | ||
| 2763 | |||
| 2764 | /* update bpage */ | ||
| 2765 | local_set(&bpage->commit, pos); | ||
| 2766 | bpage->time_stamp = save_timestamp; | ||
| 2767 | |||
| 2768 | /* we copied everything to the beginning */ | ||
| 2769 | read = 0; | ||
| 2447 | } else { | 2770 | } else { |
| 2448 | /* swap the pages */ | 2771 | /* swap the pages */ |
| 2449 | rb_init_page(bpage); | 2772 | rb_init_page(bpage); |
| 2450 | bpage = cpu_buffer->reader_page->page; | 2773 | bpage = reader->page; |
| 2451 | cpu_buffer->reader_page->page = *data_page; | 2774 | reader->page = *data_page; |
| 2452 | cpu_buffer->reader_page->read = 0; | 2775 | local_set(&reader->write, 0); |
| 2776 | reader->read = 0; | ||
| 2453 | *data_page = bpage; | 2777 | *data_page = bpage; |
| 2778 | |||
| 2779 | /* update the entry counter */ | ||
| 2780 | rb_remove_entries(cpu_buffer, bpage, read); | ||
| 2454 | } | 2781 | } |
| 2455 | ret = 1; | 2782 | ret = read; |
| 2456 | 2783 | ||
| 2457 | /* update the entry counter */ | 2784 | out_unlock: |
| 2458 | rb_remove_entries(cpu_buffer, bpage); | ||
| 2459 | out: | ||
| 2460 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2785 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| 2461 | 2786 | ||
| 2787 | out: | ||
| 2462 | return ret; | 2788 | return ret; |
| 2463 | } | 2789 | } |
| 2464 | 2790 | ||
| @@ -2466,7 +2792,7 @@ static ssize_t | |||
| 2466 | rb_simple_read(struct file *filp, char __user *ubuf, | 2792 | rb_simple_read(struct file *filp, char __user *ubuf, |
| 2467 | size_t cnt, loff_t *ppos) | 2793 | size_t cnt, loff_t *ppos) |
| 2468 | { | 2794 | { |
| 2469 | long *p = filp->private_data; | 2795 | unsigned long *p = filp->private_data; |
| 2470 | char buf[64]; | 2796 | char buf[64]; |
| 2471 | int r; | 2797 | int r; |
| 2472 | 2798 | ||
| @@ -2482,9 +2808,9 @@ static ssize_t | |||
| 2482 | rb_simple_write(struct file *filp, const char __user *ubuf, | 2808 | rb_simple_write(struct file *filp, const char __user *ubuf, |
| 2483 | size_t cnt, loff_t *ppos) | 2809 | size_t cnt, loff_t *ppos) |
| 2484 | { | 2810 | { |
| 2485 | long *p = filp->private_data; | 2811 | unsigned long *p = filp->private_data; |
| 2486 | char buf[64]; | 2812 | char buf[64]; |
| 2487 | long val; | 2813 | unsigned long val; |
| 2488 | int ret; | 2814 | int ret; |
| 2489 | 2815 | ||
| 2490 | if (cnt >= sizeof(buf)) | 2816 | if (cnt >= sizeof(buf)) |
| @@ -2509,7 +2835,7 @@ rb_simple_write(struct file *filp, const char __user *ubuf, | |||
| 2509 | return cnt; | 2835 | return cnt; |
| 2510 | } | 2836 | } |
| 2511 | 2837 | ||
| 2512 | static struct file_operations rb_simple_fops = { | 2838 | static const struct file_operations rb_simple_fops = { |
| 2513 | .open = tracing_open_generic, | 2839 | .open = tracing_open_generic, |
| 2514 | .read = rb_simple_read, | 2840 | .read = rb_simple_read, |
| 2515 | .write = rb_simple_write, | 2841 | .write = rb_simple_write, |
| @@ -2532,3 +2858,42 @@ static __init int rb_init_debugfs(void) | |||
| 2532 | } | 2858 | } |
| 2533 | 2859 | ||
| 2534 | fs_initcall(rb_init_debugfs); | 2860 | fs_initcall(rb_init_debugfs); |
| 2861 | |||
| 2862 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 2863 | static int rb_cpu_notify(struct notifier_block *self, | ||
| 2864 | unsigned long action, void *hcpu) | ||
| 2865 | { | ||
| 2866 | struct ring_buffer *buffer = | ||
| 2867 | container_of(self, struct ring_buffer, cpu_notify); | ||
| 2868 | long cpu = (long)hcpu; | ||
| 2869 | |||
| 2870 | switch (action) { | ||
| 2871 | case CPU_UP_PREPARE: | ||
| 2872 | case CPU_UP_PREPARE_FROZEN: | ||
| 2873 | if (cpu_isset(cpu, *buffer->cpumask)) | ||
| 2874 | return NOTIFY_OK; | ||
| 2875 | |||
| 2876 | buffer->buffers[cpu] = | ||
| 2877 | rb_allocate_cpu_buffer(buffer, cpu); | ||
| 2878 | if (!buffer->buffers[cpu]) { | ||
| 2879 | WARN(1, "failed to allocate ring buffer on CPU %ld\n", | ||
| 2880 | cpu); | ||
| 2881 | return NOTIFY_OK; | ||
| 2882 | } | ||
| 2883 | smp_wmb(); | ||
| 2884 | cpu_set(cpu, *buffer->cpumask); | ||
| 2885 | break; | ||
| 2886 | case CPU_DOWN_PREPARE: | ||
| 2887 | case CPU_DOWN_PREPARE_FROZEN: | ||
| 2888 | /* | ||
| 2889 | * Do nothing. | ||
| 2890 | * If we were to free the buffer, then the user would | ||
| 2891 | * lose any trace that was in the buffer. | ||
| 2892 | */ | ||
| 2893 | break; | ||
| 2894 | default: | ||
| 2895 | break; | ||
| 2896 | } | ||
| 2897 | return NOTIFY_OK; | ||
| 2898 | } | ||
| 2899 | #endif | ||
