diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 281 |
1 files changed, 189 insertions, 92 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bd38c5cfd8ad..178858492a89 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -4,13 +4,15 @@ | |||
4 | * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> | 4 | * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> |
5 | */ | 5 | */ |
6 | #include <linux/ring_buffer.h> | 6 | #include <linux/ring_buffer.h> |
7 | #include <linux/trace_clock.h> | ||
8 | #include <linux/ftrace_irq.h> | ||
7 | #include <linux/spinlock.h> | 9 | #include <linux/spinlock.h> |
8 | #include <linux/debugfs.h> | 10 | #include <linux/debugfs.h> |
9 | #include <linux/uaccess.h> | 11 | #include <linux/uaccess.h> |
12 | #include <linux/hardirq.h> | ||
10 | #include <linux/module.h> | 13 | #include <linux/module.h> |
11 | #include <linux/percpu.h> | 14 | #include <linux/percpu.h> |
12 | #include <linux/mutex.h> | 15 | #include <linux/mutex.h> |
13 | #include <linux/sched.h> /* used for sched_clock() (for now) */ | ||
14 | #include <linux/init.h> | 16 | #include <linux/init.h> |
15 | #include <linux/hash.h> | 17 | #include <linux/hash.h> |
16 | #include <linux/list.h> | 18 | #include <linux/list.h> |
@@ -57,7 +59,9 @@ enum { | |||
57 | RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, | 59 | RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, |
58 | }; | 60 | }; |
59 | 61 | ||
60 | static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; | 62 | static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; |
63 | |||
64 | #define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) | ||
61 | 65 | ||
62 | /** | 66 | /** |
63 | * tracing_on - enable all tracing buffers | 67 | * tracing_on - enable all tracing buffers |
@@ -89,26 +93,34 @@ EXPORT_SYMBOL_GPL(tracing_off); | |||
89 | * tracing_off_permanent - permanently disable ring buffers | 93 | * tracing_off_permanent - permanently disable ring buffers |
90 | * | 94 | * |
91 | * This function, once called, will disable all ring buffers | 95 | * This function, once called, will disable all ring buffers |
92 | * permanenty. | 96 | * permanently. |
93 | */ | 97 | */ |
94 | void tracing_off_permanent(void) | 98 | void tracing_off_permanent(void) |
95 | { | 99 | { |
96 | set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); | 100 | set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); |
97 | } | 101 | } |
98 | 102 | ||
103 | /** | ||
104 | * tracing_is_on - show state of ring buffers enabled | ||
105 | */ | ||
106 | int tracing_is_on(void) | ||
107 | { | ||
108 | return ring_buffer_flags == RB_BUFFERS_ON; | ||
109 | } | ||
110 | EXPORT_SYMBOL_GPL(tracing_is_on); | ||
111 | |||
99 | #include "trace.h" | 112 | #include "trace.h" |
100 | 113 | ||
101 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | 114 | /* Up this if you want to test the TIME_EXTENTS and normalization */ |
102 | #define DEBUG_SHIFT 0 | 115 | #define DEBUG_SHIFT 0 |
103 | 116 | ||
104 | /* FIXME!!! */ | ||
105 | u64 ring_buffer_time_stamp(int cpu) | 117 | u64 ring_buffer_time_stamp(int cpu) |
106 | { | 118 | { |
107 | u64 time; | 119 | u64 time; |
108 | 120 | ||
109 | preempt_disable_notrace(); | 121 | preempt_disable_notrace(); |
110 | /* shift to debug/test normalization and TIME_EXTENTS */ | 122 | /* shift to debug/test normalization and TIME_EXTENTS */ |
111 | time = sched_clock() << DEBUG_SHIFT; | 123 | time = trace_clock_local() << DEBUG_SHIFT; |
112 | preempt_enable_no_resched_notrace(); | 124 | preempt_enable_no_resched_notrace(); |
113 | 125 | ||
114 | return time; | 126 | return time; |
@@ -122,9 +134,8 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) | |||
122 | } | 134 | } |
123 | EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); | 135 | EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); |
124 | 136 | ||
125 | #define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event)) | 137 | #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) |
126 | #define RB_ALIGNMENT_SHIFT 2 | 138 | #define RB_ALIGNMENT 4U |
127 | #define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT) | ||
128 | #define RB_MAX_SMALL_DATA 28 | 139 | #define RB_MAX_SMALL_DATA 28 |
129 | 140 | ||
130 | enum { | 141 | enum { |
@@ -133,7 +144,7 @@ enum { | |||
133 | }; | 144 | }; |
134 | 145 | ||
135 | /* inline for ring buffer fast paths */ | 146 | /* inline for ring buffer fast paths */ |
136 | static inline unsigned | 147 | static unsigned |
137 | rb_event_length(struct ring_buffer_event *event) | 148 | rb_event_length(struct ring_buffer_event *event) |
138 | { | 149 | { |
139 | unsigned length; | 150 | unsigned length; |
@@ -151,7 +162,7 @@ rb_event_length(struct ring_buffer_event *event) | |||
151 | 162 | ||
152 | case RINGBUF_TYPE_DATA: | 163 | case RINGBUF_TYPE_DATA: |
153 | if (event->len) | 164 | if (event->len) |
154 | length = event->len << RB_ALIGNMENT_SHIFT; | 165 | length = event->len * RB_ALIGNMENT; |
155 | else | 166 | else |
156 | length = event->array[0]; | 167 | length = event->array[0]; |
157 | return length + RB_EVNT_HDR_SIZE; | 168 | return length + RB_EVNT_HDR_SIZE; |
@@ -179,7 +190,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event) | |||
179 | EXPORT_SYMBOL_GPL(ring_buffer_event_length); | 190 | EXPORT_SYMBOL_GPL(ring_buffer_event_length); |
180 | 191 | ||
181 | /* inline for ring buffer fast paths */ | 192 | /* inline for ring buffer fast paths */ |
182 | static inline void * | 193 | static void * |
183 | rb_event_data(struct ring_buffer_event *event) | 194 | rb_event_data(struct ring_buffer_event *event) |
184 | { | 195 | { |
185 | BUG_ON(event->type != RINGBUF_TYPE_DATA); | 196 | BUG_ON(event->type != RINGBUF_TYPE_DATA); |
@@ -209,7 +220,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); | |||
209 | 220 | ||
210 | struct buffer_data_page { | 221 | struct buffer_data_page { |
211 | u64 time_stamp; /* page time stamp */ | 222 | u64 time_stamp; /* page time stamp */ |
212 | local_t commit; /* write commited index */ | 223 | local_t commit; /* write committed index */ |
213 | unsigned char data[]; /* data of buffer page */ | 224 | unsigned char data[]; /* data of buffer page */ |
214 | }; | 225 | }; |
215 | 226 | ||
@@ -225,14 +236,25 @@ static void rb_init_page(struct buffer_data_page *bpage) | |||
225 | local_set(&bpage->commit, 0); | 236 | local_set(&bpage->commit, 0); |
226 | } | 237 | } |
227 | 238 | ||
239 | /** | ||
240 | * ring_buffer_page_len - the size of data on the page. | ||
241 | * @page: The page to read | ||
242 | * | ||
243 | * Returns the amount of data on the page, including buffer page header. | ||
244 | */ | ||
245 | size_t ring_buffer_page_len(void *page) | ||
246 | { | ||
247 | return local_read(&((struct buffer_data_page *)page)->commit) | ||
248 | + BUF_PAGE_HDR_SIZE; | ||
249 | } | ||
250 | |||
228 | /* | 251 | /* |
229 | * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing | 252 | * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing |
230 | * this issue out. | 253 | * this issue out. |
231 | */ | 254 | */ |
232 | static inline void free_buffer_page(struct buffer_page *bpage) | 255 | static void free_buffer_page(struct buffer_page *bpage) |
233 | { | 256 | { |
234 | if (bpage->page) | 257 | free_page((unsigned long)bpage->page); |
235 | free_page((unsigned long)bpage->page); | ||
236 | kfree(bpage); | 258 | kfree(bpage); |
237 | } | 259 | } |
238 | 260 | ||
@@ -246,7 +268,7 @@ static inline int test_time_stamp(u64 delta) | |||
246 | return 0; | 268 | return 0; |
247 | } | 269 | } |
248 | 270 | ||
249 | #define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data)) | 271 | #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) |
250 | 272 | ||
251 | /* | 273 | /* |
252 | * head_page == tail_page && head == tail then buffer is empty. | 274 | * head_page == tail_page && head == tail then buffer is empty. |
@@ -260,7 +282,7 @@ struct ring_buffer_per_cpu { | |||
260 | struct list_head pages; | 282 | struct list_head pages; |
261 | struct buffer_page *head_page; /* read from head */ | 283 | struct buffer_page *head_page; /* read from head */ |
262 | struct buffer_page *tail_page; /* write to tail */ | 284 | struct buffer_page *tail_page; /* write to tail */ |
263 | struct buffer_page *commit_page; /* commited pages */ | 285 | struct buffer_page *commit_page; /* committed pages */ |
264 | struct buffer_page *reader_page; | 286 | struct buffer_page *reader_page; |
265 | unsigned long overrun; | 287 | unsigned long overrun; |
266 | unsigned long entries; | 288 | unsigned long entries; |
@@ -273,8 +295,8 @@ struct ring_buffer { | |||
273 | unsigned pages; | 295 | unsigned pages; |
274 | unsigned flags; | 296 | unsigned flags; |
275 | int cpus; | 297 | int cpus; |
276 | cpumask_var_t cpumask; | ||
277 | atomic_t record_disabled; | 298 | atomic_t record_disabled; |
299 | cpumask_var_t cpumask; | ||
278 | 300 | ||
279 | struct mutex mutex; | 301 | struct mutex mutex; |
280 | 302 | ||
@@ -303,7 +325,7 @@ struct ring_buffer_iter { | |||
303 | * check_pages - integrity check of buffer pages | 325 | * check_pages - integrity check of buffer pages |
304 | * @cpu_buffer: CPU buffer with pages to test | 326 | * @cpu_buffer: CPU buffer with pages to test |
305 | * | 327 | * |
306 | * As a safty measure we check to make sure the data pages have not | 328 | * As a safety measure we check to make sure the data pages have not |
307 | * been corrupted. | 329 | * been corrupted. |
308 | */ | 330 | */ |
309 | static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) | 331 | static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) |
@@ -811,7 +833,7 @@ rb_event_index(struct ring_buffer_event *event) | |||
811 | return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); | 833 | return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); |
812 | } | 834 | } |
813 | 835 | ||
814 | static inline int | 836 | static int |
815 | rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | 837 | rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, |
816 | struct ring_buffer_event *event) | 838 | struct ring_buffer_event *event) |
817 | { | 839 | { |
@@ -825,7 +847,7 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
825 | rb_commit_index(cpu_buffer) == index; | 847 | rb_commit_index(cpu_buffer) == index; |
826 | } | 848 | } |
827 | 849 | ||
828 | static inline void | 850 | static void |
829 | rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, | 851 | rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, |
830 | struct ring_buffer_event *event) | 852 | struct ring_buffer_event *event) |
831 | { | 853 | { |
@@ -850,7 +872,7 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
850 | local_set(&cpu_buffer->commit_page->page->commit, index); | 872 | local_set(&cpu_buffer->commit_page->page->commit, index); |
851 | } | 873 | } |
852 | 874 | ||
853 | static inline void | 875 | static void |
854 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) | 876 | rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) |
855 | { | 877 | { |
856 | /* | 878 | /* |
@@ -896,7 +918,7 @@ static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
896 | cpu_buffer->reader_page->read = 0; | 918 | cpu_buffer->reader_page->read = 0; |
897 | } | 919 | } |
898 | 920 | ||
899 | static inline void rb_inc_iter(struct ring_buffer_iter *iter) | 921 | static void rb_inc_iter(struct ring_buffer_iter *iter) |
900 | { | 922 | { |
901 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 923 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
902 | 924 | ||
@@ -926,7 +948,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter) | |||
926 | * and with this, we can determine what to place into the | 948 | * and with this, we can determine what to place into the |
927 | * data field. | 949 | * data field. |
928 | */ | 950 | */ |
929 | static inline void | 951 | static void |
930 | rb_update_event(struct ring_buffer_event *event, | 952 | rb_update_event(struct ring_buffer_event *event, |
931 | unsigned type, unsigned length) | 953 | unsigned type, unsigned length) |
932 | { | 954 | { |
@@ -938,15 +960,11 @@ rb_update_event(struct ring_buffer_event *event, | |||
938 | break; | 960 | break; |
939 | 961 | ||
940 | case RINGBUF_TYPE_TIME_EXTEND: | 962 | case RINGBUF_TYPE_TIME_EXTEND: |
941 | event->len = | 963 | event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT); |
942 | (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1)) | ||
943 | >> RB_ALIGNMENT_SHIFT; | ||
944 | break; | 964 | break; |
945 | 965 | ||
946 | case RINGBUF_TYPE_TIME_STAMP: | 966 | case RINGBUF_TYPE_TIME_STAMP: |
947 | event->len = | 967 | event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT); |
948 | (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1)) | ||
949 | >> RB_ALIGNMENT_SHIFT; | ||
950 | break; | 968 | break; |
951 | 969 | ||
952 | case RINGBUF_TYPE_DATA: | 970 | case RINGBUF_TYPE_DATA: |
@@ -955,16 +973,14 @@ rb_update_event(struct ring_buffer_event *event, | |||
955 | event->len = 0; | 973 | event->len = 0; |
956 | event->array[0] = length; | 974 | event->array[0] = length; |
957 | } else | 975 | } else |
958 | event->len = | 976 | event->len = DIV_ROUND_UP(length, RB_ALIGNMENT); |
959 | (length + (RB_ALIGNMENT-1)) | ||
960 | >> RB_ALIGNMENT_SHIFT; | ||
961 | break; | 977 | break; |
962 | default: | 978 | default: |
963 | BUG(); | 979 | BUG(); |
964 | } | 980 | } |
965 | } | 981 | } |
966 | 982 | ||
967 | static inline unsigned rb_calculate_event_length(unsigned length) | 983 | static unsigned rb_calculate_event_length(unsigned length) |
968 | { | 984 | { |
969 | struct ring_buffer_event event; /* Used only for sizeof array */ | 985 | struct ring_buffer_event event; /* Used only for sizeof array */ |
970 | 986 | ||
@@ -990,6 +1006,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
990 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1006 | struct ring_buffer *buffer = cpu_buffer->buffer; |
991 | struct ring_buffer_event *event; | 1007 | struct ring_buffer_event *event; |
992 | unsigned long flags; | 1008 | unsigned long flags; |
1009 | bool lock_taken = false; | ||
993 | 1010 | ||
994 | commit_page = cpu_buffer->commit_page; | 1011 | commit_page = cpu_buffer->commit_page; |
995 | /* we just need to protect against interrupts */ | 1012 | /* we just need to protect against interrupts */ |
@@ -1003,7 +1020,30 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1003 | struct buffer_page *next_page = tail_page; | 1020 | struct buffer_page *next_page = tail_page; |
1004 | 1021 | ||
1005 | local_irq_save(flags); | 1022 | local_irq_save(flags); |
1006 | __raw_spin_lock(&cpu_buffer->lock); | 1023 | /* |
1024 | * Since the write to the buffer is still not | ||
1025 | * fully lockless, we must be careful with NMIs. | ||
1026 | * The locks in the writers are taken when a write | ||
1027 | * crosses to a new page. The locks protect against | ||
1028 | * races with the readers (this will soon be fixed | ||
1029 | * with a lockless solution). | ||
1030 | * | ||
1031 | * Because we can not protect against NMIs, and we | ||
1032 | * want to keep traces reentrant, we need to manage | ||
1033 | * what happens when we are in an NMI. | ||
1034 | * | ||
1035 | * NMIs can happen after we take the lock. | ||
1036 | * If we are in an NMI, only take the lock | ||
1037 | * if it is not already taken. Otherwise | ||
1038 | * simply fail. | ||
1039 | */ | ||
1040 | if (unlikely(in_nmi())) { | ||
1041 | if (!__raw_spin_trylock(&cpu_buffer->lock)) | ||
1042 | goto out_reset; | ||
1043 | } else | ||
1044 | __raw_spin_lock(&cpu_buffer->lock); | ||
1045 | |||
1046 | lock_taken = true; | ||
1007 | 1047 | ||
1008 | rb_inc_page(cpu_buffer, &next_page); | 1048 | rb_inc_page(cpu_buffer, &next_page); |
1009 | 1049 | ||
@@ -1012,7 +1052,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1012 | 1052 | ||
1013 | /* we grabbed the lock before incrementing */ | 1053 | /* we grabbed the lock before incrementing */ |
1014 | if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) | 1054 | if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) |
1015 | goto out_unlock; | 1055 | goto out_reset; |
1016 | 1056 | ||
1017 | /* | 1057 | /* |
1018 | * If for some reason, we had an interrupt storm that made | 1058 | * If for some reason, we had an interrupt storm that made |
@@ -1021,12 +1061,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1021 | */ | 1061 | */ |
1022 | if (unlikely(next_page == commit_page)) { | 1062 | if (unlikely(next_page == commit_page)) { |
1023 | WARN_ON_ONCE(1); | 1063 | WARN_ON_ONCE(1); |
1024 | goto out_unlock; | 1064 | goto out_reset; |
1025 | } | 1065 | } |
1026 | 1066 | ||
1027 | if (next_page == head_page) { | 1067 | if (next_page == head_page) { |
1028 | if (!(buffer->flags & RB_FL_OVERWRITE)) | 1068 | if (!(buffer->flags & RB_FL_OVERWRITE)) |
1029 | goto out_unlock; | 1069 | goto out_reset; |
1030 | 1070 | ||
1031 | /* tail_page has not moved yet? */ | 1071 | /* tail_page has not moved yet? */ |
1032 | if (tail_page == cpu_buffer->tail_page) { | 1072 | if (tail_page == cpu_buffer->tail_page) { |
@@ -1100,12 +1140,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1100 | 1140 | ||
1101 | return event; | 1141 | return event; |
1102 | 1142 | ||
1103 | out_unlock: | 1143 | out_reset: |
1104 | /* reset write */ | 1144 | /* reset write */ |
1105 | if (tail <= BUF_PAGE_SIZE) | 1145 | if (tail <= BUF_PAGE_SIZE) |
1106 | local_set(&tail_page->write, tail); | 1146 | local_set(&tail_page->write, tail); |
1107 | 1147 | ||
1108 | __raw_spin_unlock(&cpu_buffer->lock); | 1148 | if (likely(lock_taken)) |
1149 | __raw_spin_unlock(&cpu_buffer->lock); | ||
1109 | local_irq_restore(flags); | 1150 | local_irq_restore(flags); |
1110 | return NULL; | 1151 | return NULL; |
1111 | } | 1152 | } |
@@ -1265,7 +1306,6 @@ static DEFINE_PER_CPU(int, rb_need_resched); | |||
1265 | * ring_buffer_lock_reserve - reserve a part of the buffer | 1306 | * ring_buffer_lock_reserve - reserve a part of the buffer |
1266 | * @buffer: the ring buffer to reserve from | 1307 | * @buffer: the ring buffer to reserve from |
1267 | * @length: the length of the data to reserve (excluding event header) | 1308 | * @length: the length of the data to reserve (excluding event header) |
1268 | * @flags: a pointer to save the interrupt flags | ||
1269 | * | 1309 | * |
1270 | * Returns a reseverd event on the ring buffer to copy directly to. | 1310 | * Returns a reseverd event on the ring buffer to copy directly to. |
1271 | * The user of this interface will need to get the body to write into | 1311 | * The user of this interface will need to get the body to write into |
@@ -1278,9 +1318,7 @@ static DEFINE_PER_CPU(int, rb_need_resched); | |||
1278 | * If NULL is returned, then nothing has been allocated or locked. | 1318 | * If NULL is returned, then nothing has been allocated or locked. |
1279 | */ | 1319 | */ |
1280 | struct ring_buffer_event * | 1320 | struct ring_buffer_event * |
1281 | ring_buffer_lock_reserve(struct ring_buffer *buffer, | 1321 | ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) |
1282 | unsigned long length, | ||
1283 | unsigned long *flags) | ||
1284 | { | 1322 | { |
1285 | struct ring_buffer_per_cpu *cpu_buffer; | 1323 | struct ring_buffer_per_cpu *cpu_buffer; |
1286 | struct ring_buffer_event *event; | 1324 | struct ring_buffer_event *event; |
@@ -1347,15 +1385,13 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | |||
1347 | * ring_buffer_unlock_commit - commit a reserved | 1385 | * ring_buffer_unlock_commit - commit a reserved |
1348 | * @buffer: The buffer to commit to | 1386 | * @buffer: The buffer to commit to |
1349 | * @event: The event pointer to commit. | 1387 | * @event: The event pointer to commit. |
1350 | * @flags: the interrupt flags received from ring_buffer_lock_reserve. | ||
1351 | * | 1388 | * |
1352 | * This commits the data to the ring buffer, and releases any locks held. | 1389 | * This commits the data to the ring buffer, and releases any locks held. |
1353 | * | 1390 | * |
1354 | * Must be paired with ring_buffer_lock_reserve. | 1391 | * Must be paired with ring_buffer_lock_reserve. |
1355 | */ | 1392 | */ |
1356 | int ring_buffer_unlock_commit(struct ring_buffer *buffer, | 1393 | int ring_buffer_unlock_commit(struct ring_buffer *buffer, |
1357 | struct ring_buffer_event *event, | 1394 | struct ring_buffer_event *event) |
1358 | unsigned long flags) | ||
1359 | { | 1395 | { |
1360 | struct ring_buffer_per_cpu *cpu_buffer; | 1396 | struct ring_buffer_per_cpu *cpu_buffer; |
1361 | int cpu = raw_smp_processor_id(); | 1397 | int cpu = raw_smp_processor_id(); |
@@ -1438,7 +1474,7 @@ int ring_buffer_write(struct ring_buffer *buffer, | |||
1438 | } | 1474 | } |
1439 | EXPORT_SYMBOL_GPL(ring_buffer_write); | 1475 | EXPORT_SYMBOL_GPL(ring_buffer_write); |
1440 | 1476 | ||
1441 | static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) | 1477 | static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) |
1442 | { | 1478 | { |
1443 | struct buffer_page *reader = cpu_buffer->reader_page; | 1479 | struct buffer_page *reader = cpu_buffer->reader_page; |
1444 | struct buffer_page *head = cpu_buffer->head_page; | 1480 | struct buffer_page *head = cpu_buffer->head_page; |
@@ -2277,9 +2313,24 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | |||
2277 | if (buffer_a->pages != buffer_b->pages) | 2313 | if (buffer_a->pages != buffer_b->pages) |
2278 | return -EINVAL; | 2314 | return -EINVAL; |
2279 | 2315 | ||
2316 | if (ring_buffer_flags != RB_BUFFERS_ON) | ||
2317 | return -EAGAIN; | ||
2318 | |||
2319 | if (atomic_read(&buffer_a->record_disabled)) | ||
2320 | return -EAGAIN; | ||
2321 | |||
2322 | if (atomic_read(&buffer_b->record_disabled)) | ||
2323 | return -EAGAIN; | ||
2324 | |||
2280 | cpu_buffer_a = buffer_a->buffers[cpu]; | 2325 | cpu_buffer_a = buffer_a->buffers[cpu]; |
2281 | cpu_buffer_b = buffer_b->buffers[cpu]; | 2326 | cpu_buffer_b = buffer_b->buffers[cpu]; |
2282 | 2327 | ||
2328 | if (atomic_read(&cpu_buffer_a->record_disabled)) | ||
2329 | return -EAGAIN; | ||
2330 | |||
2331 | if (atomic_read(&cpu_buffer_b->record_disabled)) | ||
2332 | return -EAGAIN; | ||
2333 | |||
2283 | /* | 2334 | /* |
2284 | * We can't do a synchronize_sched here because this | 2335 | * We can't do a synchronize_sched here because this |
2285 | * function can be called in atomic context. | 2336 | * function can be called in atomic context. |
@@ -2303,13 +2354,14 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, | |||
2303 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); | 2354 | EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); |
2304 | 2355 | ||
2305 | static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, | 2356 | static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, |
2306 | struct buffer_data_page *bpage) | 2357 | struct buffer_data_page *bpage, |
2358 | unsigned int offset) | ||
2307 | { | 2359 | { |
2308 | struct ring_buffer_event *event; | 2360 | struct ring_buffer_event *event; |
2309 | unsigned long head; | 2361 | unsigned long head; |
2310 | 2362 | ||
2311 | __raw_spin_lock(&cpu_buffer->lock); | 2363 | __raw_spin_lock(&cpu_buffer->lock); |
2312 | for (head = 0; head < local_read(&bpage->commit); | 2364 | for (head = offset; head < local_read(&bpage->commit); |
2313 | head += rb_event_length(event)) { | 2365 | head += rb_event_length(event)) { |
2314 | 2366 | ||
2315 | event = __rb_data_page_index(bpage, head); | 2367 | event = __rb_data_page_index(bpage, head); |
@@ -2340,8 +2392,8 @@ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, | |||
2340 | */ | 2392 | */ |
2341 | void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) | 2393 | void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) |
2342 | { | 2394 | { |
2343 | unsigned long addr; | ||
2344 | struct buffer_data_page *bpage; | 2395 | struct buffer_data_page *bpage; |
2396 | unsigned long addr; | ||
2345 | 2397 | ||
2346 | addr = __get_free_page(GFP_KERNEL); | 2398 | addr = __get_free_page(GFP_KERNEL); |
2347 | if (!addr) | 2399 | if (!addr) |
@@ -2349,6 +2401,8 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) | |||
2349 | 2401 | ||
2350 | bpage = (void *)addr; | 2402 | bpage = (void *)addr; |
2351 | 2403 | ||
2404 | rb_init_page(bpage); | ||
2405 | |||
2352 | return bpage; | 2406 | return bpage; |
2353 | } | 2407 | } |
2354 | 2408 | ||
@@ -2368,6 +2422,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) | |||
2368 | * ring_buffer_read_page - extract a page from the ring buffer | 2422 | * ring_buffer_read_page - extract a page from the ring buffer |
2369 | * @buffer: buffer to extract from | 2423 | * @buffer: buffer to extract from |
2370 | * @data_page: the page to use allocated from ring_buffer_alloc_read_page | 2424 | * @data_page: the page to use allocated from ring_buffer_alloc_read_page |
2425 | * @len: amount to extract | ||
2371 | * @cpu: the cpu of the buffer to extract | 2426 | * @cpu: the cpu of the buffer to extract |
2372 | * @full: should the extraction only happen when the page is full. | 2427 | * @full: should the extraction only happen when the page is full. |
2373 | * | 2428 | * |
@@ -2377,12 +2432,12 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) | |||
2377 | * to swap with a page in the ring buffer. | 2432 | * to swap with a page in the ring buffer. |
2378 | * | 2433 | * |
2379 | * for example: | 2434 | * for example: |
2380 | * rpage = ring_buffer_alloc_page(buffer); | 2435 | * rpage = ring_buffer_alloc_read_page(buffer); |
2381 | * if (!rpage) | 2436 | * if (!rpage) |
2382 | * return error; | 2437 | * return error; |
2383 | * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0); | 2438 | * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); |
2384 | * if (ret) | 2439 | * if (ret >= 0) |
2385 | * process_page(rpage); | 2440 | * process_page(rpage, ret); |
2386 | * | 2441 | * |
2387 | * When @full is set, the function will not return true unless | 2442 | * When @full is set, the function will not return true unless |
2388 | * the writer is off the reader page. | 2443 | * the writer is off the reader page. |
@@ -2393,69 +2448,111 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) | |||
2393 | * responsible for that. | 2448 | * responsible for that. |
2394 | * | 2449 | * |
2395 | * Returns: | 2450 | * Returns: |
2396 | * 1 if data has been transferred | 2451 | * >=0 if data has been transferred, returns the offset of consumed data. |
2397 | * 0 if no data has been transferred. | 2452 | * <0 if no data has been transferred. |
2398 | */ | 2453 | */ |
2399 | int ring_buffer_read_page(struct ring_buffer *buffer, | 2454 | int ring_buffer_read_page(struct ring_buffer *buffer, |
2400 | void **data_page, int cpu, int full) | 2455 | void **data_page, size_t len, int cpu, int full) |
2401 | { | 2456 | { |
2402 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; | 2457 | struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
2403 | struct ring_buffer_event *event; | 2458 | struct ring_buffer_event *event; |
2404 | struct buffer_data_page *bpage; | 2459 | struct buffer_data_page *bpage; |
2460 | struct buffer_page *reader; | ||
2405 | unsigned long flags; | 2461 | unsigned long flags; |
2406 | int ret = 0; | 2462 | unsigned int commit; |
2463 | unsigned int read; | ||
2464 | u64 save_timestamp; | ||
2465 | int ret = -1; | ||
2466 | |||
2467 | /* | ||
2468 | * If len is not big enough to hold the page header, then | ||
2469 | * we can not copy anything. | ||
2470 | */ | ||
2471 | if (len <= BUF_PAGE_HDR_SIZE) | ||
2472 | return -1; | ||
2473 | |||
2474 | len -= BUF_PAGE_HDR_SIZE; | ||
2407 | 2475 | ||
2408 | if (!data_page) | 2476 | if (!data_page) |
2409 | return 0; | 2477 | return -1; |
2410 | 2478 | ||
2411 | bpage = *data_page; | 2479 | bpage = *data_page; |
2412 | if (!bpage) | 2480 | if (!bpage) |
2413 | return 0; | 2481 | return -1; |
2414 | 2482 | ||
2415 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2483 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
2416 | 2484 | ||
2417 | /* | 2485 | reader = rb_get_reader_page(cpu_buffer); |
2418 | * rb_buffer_peek will get the next ring buffer if | 2486 | if (!reader) |
2419 | * the current reader page is empty. | ||
2420 | */ | ||
2421 | event = rb_buffer_peek(buffer, cpu, NULL); | ||
2422 | if (!event) | ||
2423 | goto out; | 2487 | goto out; |
2424 | 2488 | ||
2425 | /* check for data */ | 2489 | event = rb_reader_event(cpu_buffer); |
2426 | if (!local_read(&cpu_buffer->reader_page->page->commit)) | 2490 | |
2427 | goto out; | 2491 | read = reader->read; |
2492 | commit = rb_page_commit(reader); | ||
2493 | |||
2428 | /* | 2494 | /* |
2429 | * If the writer is already off of the read page, then simply | 2495 | * If this page has been partially read or |
2430 | * switch the read page with the given page. Otherwise | 2496 | * if len is not big enough to read the rest of the page or |
2431 | * we need to copy the data from the reader to the writer. | 2497 | * a writer is still on the page, then |
2498 | * we must copy the data from the page to the buffer. | ||
2499 | * Otherwise, we can simply swap the page with the one passed in. | ||
2432 | */ | 2500 | */ |
2433 | if (cpu_buffer->reader_page == cpu_buffer->commit_page) { | 2501 | if (read || (len < (commit - read)) || |
2434 | unsigned int read = cpu_buffer->reader_page->read; | 2502 | cpu_buffer->reader_page == cpu_buffer->commit_page) { |
2503 | struct buffer_data_page *rpage = cpu_buffer->reader_page->page; | ||
2504 | unsigned int rpos = read; | ||
2505 | unsigned int pos = 0; | ||
2506 | unsigned int size; | ||
2435 | 2507 | ||
2436 | if (full) | 2508 | if (full) |
2437 | goto out; | 2509 | goto out; |
2438 | /* The writer is still on the reader page, we must copy */ | ||
2439 | bpage = cpu_buffer->reader_page->page; | ||
2440 | memcpy(bpage->data, | ||
2441 | cpu_buffer->reader_page->page->data + read, | ||
2442 | local_read(&bpage->commit) - read); | ||
2443 | 2510 | ||
2444 | /* consume what was read */ | 2511 | if (len > (commit - read)) |
2445 | cpu_buffer->reader_page += read; | 2512 | len = (commit - read); |
2446 | 2513 | ||
2514 | size = rb_event_length(event); | ||
2515 | |||
2516 | if (len < size) | ||
2517 | goto out; | ||
2518 | |||
2519 | /* save the current timestamp, since the user will need it */ | ||
2520 | save_timestamp = cpu_buffer->read_stamp; | ||
2521 | |||
2522 | /* Need to copy one event at a time */ | ||
2523 | do { | ||
2524 | memcpy(bpage->data + pos, rpage->data + rpos, size); | ||
2525 | |||
2526 | len -= size; | ||
2527 | |||
2528 | rb_advance_reader(cpu_buffer); | ||
2529 | rpos = reader->read; | ||
2530 | pos += size; | ||
2531 | |||
2532 | event = rb_reader_event(cpu_buffer); | ||
2533 | size = rb_event_length(event); | ||
2534 | } while (len > size); | ||
2535 | |||
2536 | /* update bpage */ | ||
2537 | local_set(&bpage->commit, pos); | ||
2538 | bpage->time_stamp = save_timestamp; | ||
2539 | |||
2540 | /* we copied everything to the beginning */ | ||
2541 | read = 0; | ||
2447 | } else { | 2542 | } else { |
2448 | /* swap the pages */ | 2543 | /* swap the pages */ |
2449 | rb_init_page(bpage); | 2544 | rb_init_page(bpage); |
2450 | bpage = cpu_buffer->reader_page->page; | 2545 | bpage = reader->page; |
2451 | cpu_buffer->reader_page->page = *data_page; | 2546 | reader->page = *data_page; |
2452 | cpu_buffer->reader_page->read = 0; | 2547 | local_set(&reader->write, 0); |
2548 | reader->read = 0; | ||
2453 | *data_page = bpage; | 2549 | *data_page = bpage; |
2550 | |||
2551 | /* update the entry counter */ | ||
2552 | rb_remove_entries(cpu_buffer, bpage, read); | ||
2454 | } | 2553 | } |
2455 | ret = 1; | 2554 | ret = read; |
2456 | 2555 | ||
2457 | /* update the entry counter */ | ||
2458 | rb_remove_entries(cpu_buffer, bpage); | ||
2459 | out: | 2556 | out: |
2460 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2557 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2461 | 2558 | ||
@@ -2466,7 +2563,7 @@ static ssize_t | |||
2466 | rb_simple_read(struct file *filp, char __user *ubuf, | 2563 | rb_simple_read(struct file *filp, char __user *ubuf, |
2467 | size_t cnt, loff_t *ppos) | 2564 | size_t cnt, loff_t *ppos) |
2468 | { | 2565 | { |
2469 | long *p = filp->private_data; | 2566 | unsigned long *p = filp->private_data; |
2470 | char buf[64]; | 2567 | char buf[64]; |
2471 | int r; | 2568 | int r; |
2472 | 2569 | ||
@@ -2482,9 +2579,9 @@ static ssize_t | |||
2482 | rb_simple_write(struct file *filp, const char __user *ubuf, | 2579 | rb_simple_write(struct file *filp, const char __user *ubuf, |
2483 | size_t cnt, loff_t *ppos) | 2580 | size_t cnt, loff_t *ppos) |
2484 | { | 2581 | { |
2485 | long *p = filp->private_data; | 2582 | unsigned long *p = filp->private_data; |
2486 | char buf[64]; | 2583 | char buf[64]; |
2487 | long val; | 2584 | unsigned long val; |
2488 | int ret; | 2585 | int ret; |
2489 | 2586 | ||
2490 | if (cnt >= sizeof(buf)) | 2587 | if (cnt >= sizeof(buf)) |
@@ -2509,7 +2606,7 @@ rb_simple_write(struct file *filp, const char __user *ubuf, | |||
2509 | return cnt; | 2606 | return cnt; |
2510 | } | 2607 | } |
2511 | 2608 | ||
2512 | static struct file_operations rb_simple_fops = { | 2609 | static const struct file_operations rb_simple_fops = { |
2513 | .open = tracing_open_generic, | 2610 | .open = tracing_open_generic, |
2514 | .read = rb_simple_read, | 2611 | .read = rb_simple_read, |
2515 | .write = rb_simple_write, | 2612 | .write = rb_simple_write, |