diff options
author | Bjoern Brandenburg <bbb@mpi-sws.org> | 2013-06-23 08:40:52 -0400 |
---|---|---|
committer | Bjoern Brandenburg <bbb@mpi-sws.org> | 2014-06-07 05:30:26 -0400 |
commit | c9330b62dc1eb3cf964793447200b2395e3e8e16 (patch) | |
tree | 37604bd60ed4df988fea5578d6942d06307503aa | |
parent | 9ef9be8fed9471eaabd10dd1ba25514729ec1408 (diff) |
Feather-Trace: add LITMUS^RT overhead tracing infrastructure
This patch adds the main infrastructure for tracing overheads in
LITMUS^RT. It does not yet introduce any tracepoints into the kernel.
-rw-r--r-- | include/litmus/trace.h | 142 | ||||
-rw-r--r-- | litmus/Kconfig | 25 | ||||
-rw-r--r-- | litmus/Makefile | 1 | ||||
-rw-r--r-- | litmus/trace.c | 562 |
4 files changed, 730 insertions, 0 deletions
diff --git a/include/litmus/trace.h b/include/litmus/trace.h new file mode 100644 index 000000000000..601787214037 --- /dev/null +++ b/include/litmus/trace.h | |||
@@ -0,0 +1,142 @@ | |||
1 | #ifndef _SYS_TRACE_H_ | ||
2 | #define _SYS_TRACE_H_ | ||
3 | |||
4 | #ifdef CONFIG_SCHED_OVERHEAD_TRACE | ||
5 | |||
6 | |||
7 | #include <litmus/feather_trace.h> | ||
8 | #include <litmus/feather_buffer.h> | ||
9 | |||
10 | |||
11 | /*********************** TIMESTAMPS ************************/ | ||
12 | |||
13 | enum task_type_marker { | ||
14 | TSK_BE, | ||
15 | TSK_RT, | ||
16 | TSK_UNKNOWN | ||
17 | }; | ||
18 | |||
19 | struct timestamp { | ||
20 | uint64_t timestamp:48; | ||
21 | uint64_t pid:16; | ||
22 | uint32_t seq_no; | ||
23 | uint8_t cpu; | ||
24 | uint8_t event; | ||
25 | uint8_t task_type:2; | ||
26 | uint8_t irq_flag:1; | ||
27 | uint8_t irq_count:5; | ||
28 | }; | ||
29 | |||
30 | /* tracing callbacks */ | ||
31 | feather_callback void msg_sent(unsigned long event, unsigned long to); | ||
32 | feather_callback void msg_received(unsigned long event); | ||
33 | |||
34 | #define MSG_TIMESTAMP_SENT(id, to) \ | ||
35 | ft_event1(id, msg_sent, (unsigned long) to); | ||
36 | |||
37 | #define MSG_TIMESTAMP_RECEIVED(id) \ | ||
38 | ft_event0(id, msg_received); | ||
39 | |||
40 | feather_callback void save_cpu_timestamp(unsigned long event); | ||
41 | feather_callback void save_cpu_timestamp_time(unsigned long event, unsigned long time_ptr); | ||
42 | feather_callback void save_cpu_timestamp_irq(unsigned long event, unsigned long irq_count_ptr); | ||
43 | feather_callback void save_cpu_timestamp_task(unsigned long event, unsigned long t_ptr); | ||
44 | feather_callback void save_cpu_timestamp_def(unsigned long event, unsigned long type); | ||
45 | feather_callback void save_cpu_task_latency(unsigned long event, unsigned long when_ptr); | ||
46 | |||
47 | #define CPU_TIMESTAMP_TIME(id, time_ptr) \ | ||
48 | ft_event1(id, save_cpu_timestamp_time, (unsigned long) time_ptr) | ||
49 | |||
50 | #define CPU_TIMESTAMP_IRQ(id, irq_count_ptr) \ | ||
51 | ft_event1(id, save_cpu_timestamp_irq, (unsigned long) irq_count_ptr) | ||
52 | |||
53 | #define CPU_TIMESTAMP(id) ft_event0(id, save_cpu_timestamp) | ||
54 | |||
55 | #define CPU_DTIMESTAMP(id, def) ft_event1(id, save_cpu_timestamp_def, (unsigned long) def) | ||
56 | |||
57 | #define CPU_TIMESTAMP_CUR(id) CPU_DTIMESTAMP(id, is_realtime(current) ? TSK_RT : TSK_BE) | ||
58 | |||
59 | #define CPU_TTIMESTAMP(id, task) \ | ||
60 | ft_event1(id, save_cpu_timestamp_task, (unsigned long) task) | ||
61 | |||
62 | #define CPU_LTIMESTAMP(id, task) \ | ||
63 | ft_event1(id, save_cpu_task_latency, (unsigned long) task) | ||
64 | |||
65 | #else /* !CONFIG_SCHED_OVERHEAD_TRACE */ | ||
66 | |||
67 | #define MSG_TIMESTAMP_SENT(id, to) | ||
68 | #define MSG_TIMESTAMP_RECEIVED(id) | ||
69 | |||
70 | #define CPU_TIMESTAMP_TIME(id, time_ptr) | ||
71 | #define CPU_TIMESTAMP_IRQ(id, irq_count_ptr) | ||
72 | #define CPU_TIMESTAMP(id) | ||
73 | #define CPU_DTIMESTAMP(id, def) | ||
74 | #define CPU_TIMESTAMP_CUR(id) | ||
75 | #define CPU_TTIMESTAMP(id, task) | ||
76 | #define CPU_LTIMESTAMP(id, task) | ||
77 | |||
78 | #endif | ||
79 | |||
80 | |||
81 | /* Convention for timestamps | ||
82 | * ========================= | ||
83 | * | ||
84 | * In order to process the trace files with a common tool, we use the following | ||
85 | * convention to measure execution times: The end time id of a code segment is | ||
86 | * always the next number after the start time event id. | ||
87 | */ | ||
88 | |||
89 | #define __TS_SYSCALL_IN_START(p) CPU_TIMESTAMP_TIME(10, p) | ||
90 | #define __TS_SYSCALL_IN_END(p) CPU_TIMESTAMP_IRQ(11, p) | ||
91 | |||
92 | #define TS_SYSCALL_OUT_START CPU_TIMESTAMP_CUR(20) | ||
93 | #define TS_SYSCALL_OUT_END CPU_TIMESTAMP_CUR(21) | ||
94 | |||
95 | #define TS_LOCK_START CPU_TIMESTAMP_CUR(30) | ||
96 | #define TS_LOCK_END CPU_TIMESTAMP_CUR(31) | ||
97 | |||
98 | #define TS_LOCK_SUSPEND CPU_TIMESTAMP_CUR(38) | ||
99 | #define TS_LOCK_RESUME CPU_TIMESTAMP_CUR(39) | ||
100 | |||
101 | #define TS_UNLOCK_START CPU_TIMESTAMP_CUR(40) | ||
102 | #define TS_UNLOCK_END CPU_TIMESTAMP_CUR(41) | ||
103 | |||
104 | #define TS_SCHED_START CPU_DTIMESTAMP(100, TSK_UNKNOWN) /* we only | ||
105 | * care | ||
106 | * about | ||
107 | * next */ | ||
108 | #define TS_SCHED_END(t) CPU_TTIMESTAMP(101, t) | ||
109 | #define TS_SCHED2_START(t) CPU_TTIMESTAMP(102, t) | ||
110 | #define TS_SCHED2_END(t) CPU_TTIMESTAMP(103, t) | ||
111 | |||
112 | #define TS_CXS_START(t) CPU_TTIMESTAMP(104, t) | ||
113 | #define TS_CXS_END(t) CPU_TTIMESTAMP(105, t) | ||
114 | |||
115 | #define TS_RELEASE_START CPU_DTIMESTAMP(106, TSK_RT) | ||
116 | #define TS_RELEASE_END CPU_DTIMESTAMP(107, TSK_RT) | ||
117 | |||
118 | #define TS_TICK_START(t) CPU_TTIMESTAMP(110, t) | ||
119 | #define TS_TICK_END(t) CPU_TTIMESTAMP(111, t) | ||
120 | |||
121 | #define TS_QUANTUM_BOUNDARY_START CPU_TIMESTAMP_CUR(112) | ||
122 | #define TS_QUANTUM_BOUNDARY_END CPU_TIMESTAMP_CUR(113) | ||
123 | |||
124 | |||
125 | #define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */ | ||
126 | #define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */ | ||
127 | |||
128 | #define TS_PLUGIN_TICK_START /* TIMESTAMP(130) */ | ||
129 | #define TS_PLUGIN_TICK_END /* TIMESTAMP(131) */ | ||
130 | |||
131 | #define TS_ENTER_NP_START CPU_TIMESTAMP(140) | ||
132 | #define TS_ENTER_NP_END CPU_TIMESTAMP(141) | ||
133 | |||
134 | #define TS_EXIT_NP_START CPU_TIMESTAMP(150) | ||
135 | #define TS_EXIT_NP_END CPU_TIMESTAMP(151) | ||
136 | |||
137 | #define TS_SEND_RESCHED_START(c) MSG_TIMESTAMP_SENT(190, c) | ||
138 | #define TS_SEND_RESCHED_END MSG_TIMESTAMP_RECEIVED(191) | ||
139 | |||
140 | #define TS_RELEASE_LATENCY(when) CPU_LTIMESTAMP(208, &(when)) | ||
141 | |||
142 | #endif /* !_SYS_TRACE_H_ */ | ||
diff --git a/litmus/Kconfig b/litmus/Kconfig index 7456eb209cf7..0c7e06b922a1 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig | |||
@@ -24,6 +24,31 @@ config FEATHER_TRACE | |||
24 | you still need to enable SCHED_TASK_TRACE and/or SCHED_OVERHEAD_TRACE to | 24 | you still need to enable SCHED_TASK_TRACE and/or SCHED_OVERHEAD_TRACE to |
25 | actually enable any events. | 25 | actually enable any events. |
26 | 26 | ||
27 | config SCHED_OVERHEAD_TRACE | ||
28 | bool "Record timestamps for overhead measurements" | ||
29 | depends on FEATHER_TRACE | ||
30 | default y | ||
31 | help | ||
32 | Export event stream for overhead tracing. | ||
33 | Say Yes for overhead tracing. | ||
34 | |||
35 | config SCHED_OVERHEAD_TRACE_SHIFT | ||
36 | int "Buffer size for Feather-Trace overhead data" | ||
37 | depends on SCHED_OVERHEAD_TRACE | ||
38 | range 15 32 | ||
39 | default 22 | ||
40 | help | ||
41 | |||
42 | Select the buffer size for the Feather-Trace overhead tracing | ||
43 | infrastructure (/dev/litmus/ft_trace0 & ftcat) as a power of two. The | ||
44 | larger the buffer, the less likely the chance of buffer overflows if | ||
45 | the ftcat process is starved by real-time activity. In machines with | ||
46 | large memories, large buffer sizes are recommended. | ||
47 | |||
48 | Examples: 16 => 2 MB | ||
49 | 24 => 512 MB | ||
50 | 26 => 2G MB | ||
51 | |||
27 | endmenu | 52 | endmenu |
28 | 53 | ||
29 | endmenu | 54 | endmenu |
diff --git a/litmus/Makefile b/litmus/Makefile index bca61e6deb71..99f90c34f141 100644 --- a/litmus/Makefile +++ b/litmus/Makefile | |||
@@ -3,3 +3,4 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o | 5 | obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o |
6 | obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o | ||
diff --git a/litmus/trace.c b/litmus/trace.c new file mode 100644 index 000000000000..a3786232ae63 --- /dev/null +++ b/litmus/trace.c | |||
@@ -0,0 +1,562 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/uaccess.h> | ||
4 | |||
5 | #include <litmus/ftdev.h> | ||
6 | #include <litmus/trace.h> | ||
7 | |||
8 | /* dummy definition of is_realtime() */ | ||
9 | #define is_realtime(t) (0) | ||
10 | |||
11 | /******************************************************************************/ | ||
12 | /* Allocation */ | ||
13 | /******************************************************************************/ | ||
14 | |||
15 | static struct ftdev cpu_overhead_dev; | ||
16 | static struct ftdev msg_overhead_dev; | ||
17 | |||
18 | #define cpu_trace_ts_buf(cpu) cpu_overhead_dev.minor[(cpu)].buf | ||
19 | #define msg_trace_ts_buf(cpu) msg_overhead_dev.minor[(cpu)].buf | ||
20 | |||
21 | DEFINE_PER_CPU(atomic_t, irq_fired_count;) | ||
22 | DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, cpu_irq_fired_count); | ||
23 | |||
24 | static DEFINE_PER_CPU(unsigned int, cpu_ts_seq_no); | ||
25 | static DEFINE_PER_CPU(unsigned int, msg_ts_seq_no); | ||
26 | |||
27 | static int64_t cycle_offset[NR_CPUS][NR_CPUS]; | ||
28 | |||
29 | void ft_irq_fired(void) | ||
30 | { | ||
31 | /* Only called with preemptions disabled. */ | ||
32 | atomic_inc(&__get_cpu_var(irq_fired_count)); | ||
33 | atomic_inc(&__get_cpu_var(cpu_irq_fired_count)); | ||
34 | } | ||
35 | |||
36 | static inline void clear_irq_fired(void) | ||
37 | { | ||
38 | atomic_set(&__raw_get_cpu_var(irq_fired_count), 0); | ||
39 | } | ||
40 | |||
41 | static inline unsigned int get_and_clear_irq_fired(void) | ||
42 | { | ||
43 | /* This is potentially not atomic since we might migrate if | ||
44 | * preemptions are not disabled. As a tradeoff between | ||
45 | * accuracy and tracing overheads, this seems acceptable. | ||
46 | * If it proves to be a problem, then one could add a callback | ||
47 | * from the migration code to invalidate irq_fired_count. | ||
48 | */ | ||
49 | return atomic_xchg(&__raw_get_cpu_var(irq_fired_count), 0); | ||
50 | } | ||
51 | |||
52 | static inline unsigned int get_and_clear_irq_fired_for_cpu(int cpu) | ||
53 | { | ||
54 | return atomic_xchg(&per_cpu(irq_fired_count, cpu), 0); | ||
55 | } | ||
56 | |||
57 | static inline void cpu_clear_irq_fired(void) | ||
58 | { | ||
59 | atomic_set(&__raw_get_cpu_var(cpu_irq_fired_count), 0); | ||
60 | } | ||
61 | |||
62 | static inline unsigned int cpu_get_and_clear_irq_fired(void) | ||
63 | { | ||
64 | return atomic_xchg(&__raw_get_cpu_var(cpu_irq_fired_count), 0); | ||
65 | } | ||
66 | |||
67 | static inline void save_irq_flags(struct timestamp *ts, unsigned int irq_count) | ||
68 | { | ||
69 | /* Store how many interrupts occurred. */ | ||
70 | ts->irq_count = irq_count; | ||
71 | /* Extra flag because ts->irq_count overflows quickly. */ | ||
72 | ts->irq_flag = irq_count > 0; | ||
73 | } | ||
74 | |||
75 | #define NO_IRQ_COUNT 0 | ||
76 | #define LOCAL_IRQ_COUNT 1 | ||
77 | #define REMOTE_IRQ_COUNT 2 | ||
78 | |||
79 | #define DO_NOT_RECORD_TIMESTAMP 0 | ||
80 | #define RECORD_LOCAL_TIMESTAMP 1 | ||
81 | #define RECORD_OFFSET_TIMESTAMP 2 | ||
82 | |||
83 | static inline void __write_record( | ||
84 | uint8_t event, | ||
85 | uint8_t type, | ||
86 | uint16_t pid_fragment, | ||
87 | unsigned int irq_count, | ||
88 | int record_irq, | ||
89 | int hide_irq, | ||
90 | uint64_t timestamp, | ||
91 | int record_timestamp, | ||
92 | |||
93 | int only_single_writer, | ||
94 | int is_cpu_timestamp, | ||
95 | int local_cpu, | ||
96 | uint8_t other_cpu) | ||
97 | { | ||
98 | unsigned long flags; | ||
99 | unsigned int seq_no; | ||
100 | struct timestamp *ts; | ||
101 | int cpu; | ||
102 | struct ft_buffer* buf; | ||
103 | |||
104 | /* Avoid preemptions while recording the timestamp. This reduces the | ||
105 | * number of "out of order" timestamps in the stream and makes | ||
106 | * post-processing easier. */ | ||
107 | |||
108 | local_irq_save(flags); | ||
109 | |||
110 | if (local_cpu) | ||
111 | cpu = smp_processor_id(); | ||
112 | else | ||
113 | cpu = other_cpu; | ||
114 | |||
115 | /* resolved during function inlining */ | ||
116 | if (is_cpu_timestamp) { | ||
117 | seq_no = __get_cpu_var(cpu_ts_seq_no)++; | ||
118 | buf = cpu_trace_ts_buf(cpu); | ||
119 | } else { | ||
120 | seq_no = fetch_and_inc((int *) &per_cpu(msg_ts_seq_no, cpu)); | ||
121 | buf = msg_trace_ts_buf(cpu); | ||
122 | } | ||
123 | |||
124 | /* If buf is non-NULL here, then the buffer cannot be deallocated until | ||
125 | * we turn interrupts on again. This is because free_timestamp_buffer() | ||
126 | * indirectly causes TLB invalidations due to modifications of the | ||
127 | * kernel address space, namely via vfree() in free_ft_buffer(), which | ||
128 | * cannot be processed until we turn on interrupts again. | ||
129 | */ | ||
130 | |||
131 | if (buf && | ||
132 | (only_single_writer /* resolved during function inlining */ | ||
133 | ? ft_buffer_start_single_write(buf, (void**) &ts) | ||
134 | : ft_buffer_start_write(buf, (void**) &ts))) { | ||
135 | ts->event = event; | ||
136 | ts->seq_no = seq_no; | ||
137 | |||
138 | ts->task_type = type; | ||
139 | ts->pid = pid_fragment; | ||
140 | |||
141 | ts->cpu = cpu; | ||
142 | |||
143 | if (record_irq) { | ||
144 | if (local_cpu) | ||
145 | irq_count = cpu_get_and_clear_irq_fired(); | ||
146 | else | ||
147 | irq_count = get_and_clear_irq_fired_for_cpu(cpu); | ||
148 | } | ||
149 | |||
150 | save_irq_flags(ts, irq_count - hide_irq); | ||
151 | |||
152 | if (record_timestamp) | ||
153 | timestamp = ft_timestamp(); | ||
154 | if (record_timestamp == RECORD_OFFSET_TIMESTAMP) | ||
155 | timestamp += cycle_offset[smp_processor_id()][cpu]; | ||
156 | |||
157 | ts->timestamp = timestamp; | ||
158 | ft_buffer_finish_write(buf, ts); | ||
159 | } | ||
160 | |||
161 | local_irq_restore(flags); | ||
162 | } | ||
163 | |||
164 | |||
165 | static inline void write_cpu_timestamp( | ||
166 | uint8_t event, | ||
167 | uint8_t type, | ||
168 | uint16_t pid_fragment, | ||
169 | unsigned int irq_count, | ||
170 | int record_irq, | ||
171 | int hide_irq, | ||
172 | uint64_t timestamp, | ||
173 | int record_timestamp) | ||
174 | { | ||
175 | __write_record(event, type, | ||
176 | pid_fragment, | ||
177 | irq_count, record_irq, hide_irq, | ||
178 | timestamp, record_timestamp, | ||
179 | 1 /* only_single_writer */, | ||
180 | 1 /* is_cpu_timestamp */, | ||
181 | 1 /* local_cpu */, | ||
182 | 0xff /* other_cpu */); | ||
183 | } | ||
184 | |||
185 | static inline void save_msg_timestamp( | ||
186 | uint8_t event, | ||
187 | int hide_irq) | ||
188 | { | ||
189 | struct task_struct *t = current; | ||
190 | __write_record(event, is_realtime(t) ? TSK_RT : TSK_BE, | ||
191 | t->pid, | ||
192 | 0, LOCAL_IRQ_COUNT, hide_irq, | ||
193 | 0, RECORD_LOCAL_TIMESTAMP, | ||
194 | 0 /* only_single_writer */, | ||
195 | 0 /* is_cpu_timestamp */, | ||
196 | 1 /* local_cpu */, | ||
197 | 0xff /* other_cpu */); | ||
198 | } | ||
199 | |||
200 | static inline void save_remote_msg_timestamp( | ||
201 | uint8_t event, | ||
202 | uint8_t remote_cpu) | ||
203 | { | ||
204 | struct task_struct *t = current; | ||
205 | __write_record(event, is_realtime(t) ? TSK_RT : TSK_BE, | ||
206 | t->pid, | ||
207 | 0, REMOTE_IRQ_COUNT, 0, | ||
208 | 0, RECORD_OFFSET_TIMESTAMP, | ||
209 | 0 /* only_single_writer */, | ||
210 | 0 /* is_cpu_timestamp */, | ||
211 | 0 /* local_cpu */, | ||
212 | remote_cpu); | ||
213 | } | ||
214 | |||
215 | feather_callback void save_cpu_timestamp_def(unsigned long event, | ||
216 | unsigned long type) | ||
217 | { | ||
218 | write_cpu_timestamp(event, type, | ||
219 | current->pid, | ||
220 | 0, LOCAL_IRQ_COUNT, 0, | ||
221 | 0, RECORD_LOCAL_TIMESTAMP); | ||
222 | } | ||
223 | |||
224 | feather_callback void save_cpu_timestamp_task(unsigned long event, | ||
225 | unsigned long t_ptr) | ||
226 | { | ||
227 | struct task_struct *t = (struct task_struct *) t_ptr; | ||
228 | int rt = is_realtime(t); | ||
229 | |||
230 | write_cpu_timestamp(event, rt ? TSK_RT : TSK_BE, | ||
231 | t->pid, | ||
232 | 0, LOCAL_IRQ_COUNT, 0, | ||
233 | 0, RECORD_LOCAL_TIMESTAMP); | ||
234 | } | ||
235 | |||
236 | feather_callback void save_cpu_task_latency(unsigned long event, | ||
237 | unsigned long when_ptr) | ||
238 | { | ||
239 | lt_t now = litmus_clock(); | ||
240 | lt_t *when = (lt_t*) when_ptr; | ||
241 | |||
242 | write_cpu_timestamp(event, TSK_RT, | ||
243 | 0, | ||
244 | 0, LOCAL_IRQ_COUNT, 0, | ||
245 | now - *when, DO_NOT_RECORD_TIMESTAMP); | ||
246 | } | ||
247 | |||
248 | /* fake timestamp to user-reported time */ | ||
249 | feather_callback void save_cpu_timestamp_time(unsigned long event, | ||
250 | unsigned long ptr) | ||
251 | { | ||
252 | uint64_t* time = (uint64_t*) ptr; | ||
253 | |||
254 | write_cpu_timestamp(event, is_realtime(current) ? TSK_RT : TSK_BE, | ||
255 | current->pid, | ||
256 | 0, LOCAL_IRQ_COUNT, 0, | ||
257 | *time, DO_NOT_RECORD_TIMESTAMP); | ||
258 | } | ||
259 | |||
260 | /* Record user-reported IRQ count */ | ||
261 | feather_callback void save_cpu_timestamp_irq(unsigned long event, | ||
262 | unsigned long irq_counter_ptr) | ||
263 | { | ||
264 | uint64_t* irqs = (uint64_t*) irq_counter_ptr; | ||
265 | |||
266 | write_cpu_timestamp(event, is_realtime(current) ? TSK_RT : TSK_BE, | ||
267 | current->pid, | ||
268 | *irqs, NO_IRQ_COUNT, 0, | ||
269 | 0, RECORD_LOCAL_TIMESTAMP); | ||
270 | } | ||
271 | |||
272 | |||
273 | feather_callback void msg_sent(unsigned long event, unsigned long to) | ||
274 | { | ||
275 | save_remote_msg_timestamp(event, to); | ||
276 | } | ||
277 | |||
278 | /* Suppresses one IRQ from the irq count. Used by TS_SEND_RESCHED_END, which is | ||
279 | * called from within an interrupt that is expected. */ | ||
280 | feather_callback void msg_received(unsigned long event) | ||
281 | { | ||
282 | save_msg_timestamp(event, 1); | ||
283 | } | ||
284 | |||
285 | static void __add_timestamp_user(struct timestamp *pre_recorded) | ||
286 | { | ||
287 | unsigned long flags; | ||
288 | unsigned int seq_no; | ||
289 | struct timestamp *ts; | ||
290 | struct ft_buffer* buf; | ||
291 | int cpu; | ||
292 | |||
293 | local_irq_save(flags); | ||
294 | |||
295 | cpu = smp_processor_id(); | ||
296 | buf = cpu_trace_ts_buf(cpu); | ||
297 | |||
298 | seq_no = __get_cpu_var(cpu_ts_seq_no)++; | ||
299 | if (buf && ft_buffer_start_single_write(buf, (void**) &ts)) { | ||
300 | *ts = *pre_recorded; | ||
301 | ts->seq_no = seq_no; | ||
302 | ts->cpu = raw_smp_processor_id(); | ||
303 | save_irq_flags(ts, get_and_clear_irq_fired()); | ||
304 | ft_buffer_finish_write(buf, ts); | ||
305 | } | ||
306 | |||
307 | local_irq_restore(flags); | ||
308 | } | ||
309 | |||
310 | /******************************************************************************/ | ||
311 | /* DEVICE FILE DRIVER */ | ||
312 | /******************************************************************************/ | ||
313 | |||
314 | struct calibrate_info { | ||
315 | atomic_t ready; | ||
316 | |||
317 | uint64_t cycle_count; | ||
318 | }; | ||
319 | |||
320 | static void calibrate_helper(void *_info) | ||
321 | { | ||
322 | struct calibrate_info *info = _info; | ||
323 | /* check in with master */ | ||
324 | atomic_inc(&info->ready); | ||
325 | |||
326 | /* wait for master to signal start */ | ||
327 | while (atomic_read(&info->ready)) | ||
328 | cpu_relax(); | ||
329 | |||
330 | /* report time stamp */ | ||
331 | info->cycle_count = ft_timestamp(); | ||
332 | |||
333 | /* tell master that we are done */ | ||
334 | atomic_inc(&info->ready); | ||
335 | } | ||
336 | |||
337 | |||
338 | static int64_t calibrate_cpu(int cpu) | ||
339 | { | ||
340 | uint64_t cycles; | ||
341 | struct calibrate_info info; | ||
342 | unsigned long flags; | ||
343 | int64_t delta; | ||
344 | |||
345 | atomic_set(&info.ready, 0); | ||
346 | info.cycle_count = 0; | ||
347 | smp_wmb(); | ||
348 | |||
349 | smp_call_function_single(cpu, calibrate_helper, &info, 0); | ||
350 | |||
351 | /* wait for helper to become active */ | ||
352 | while (!atomic_read(&info.ready)) | ||
353 | cpu_relax(); | ||
354 | |||
355 | /* avoid interrupt interference */ | ||
356 | local_irq_save(flags); | ||
357 | |||
358 | /* take measurement */ | ||
359 | atomic_set(&info.ready, 0); | ||
360 | smp_wmb(); | ||
361 | cycles = ft_timestamp(); | ||
362 | |||
363 | /* wait for helper reading */ | ||
364 | while (!atomic_read(&info.ready)) | ||
365 | cpu_relax(); | ||
366 | |||
367 | /* positive offset: the other guy is ahead of us */ | ||
368 | delta = (int64_t) info.cycle_count; | ||
369 | delta -= (int64_t) cycles; | ||
370 | |||
371 | local_irq_restore(flags); | ||
372 | |||
373 | return delta; | ||
374 | } | ||
375 | |||
376 | #define NUM_SAMPLES 10 | ||
377 | |||
378 | static long calibrate_tsc_offsets(struct ftdev* ftdev, unsigned int idx, | ||
379 | unsigned long uarg) | ||
380 | { | ||
381 | int cpu, self, i; | ||
382 | int64_t delta, sample; | ||
383 | |||
384 | preempt_disable(); | ||
385 | self = smp_processor_id(); | ||
386 | |||
387 | if (uarg) | ||
388 | printk(KERN_INFO "Feather-Trace: determining TSC offsets for P%d\n", self); | ||
389 | |||
390 | for_each_online_cpu(cpu) | ||
391 | if (cpu != self) { | ||
392 | delta = calibrate_cpu(cpu); | ||
393 | for (i = 1; i < NUM_SAMPLES; i++) { | ||
394 | sample = calibrate_cpu(cpu); | ||
395 | delta = sample < delta ? sample : delta; | ||
396 | } | ||
397 | |||
398 | cycle_offset[self][cpu] = delta; | ||
399 | |||
400 | if (uarg) | ||
401 | printk(KERN_INFO "Feather-Trace: TSC offset for P%d->P%d is %lld cycles.\n", | ||
402 | self, cpu, cycle_offset[self][cpu]); | ||
403 | } | ||
404 | |||
405 | preempt_enable(); | ||
406 | return 0; | ||
407 | } | ||
408 | |||
409 | #define NO_TIMESTAMPS (2 << CONFIG_SCHED_OVERHEAD_TRACE_SHIFT) | ||
410 | |||
411 | static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) | ||
412 | { | ||
413 | unsigned int count = NO_TIMESTAMPS; | ||
414 | |||
415 | /* An overhead-tracing timestamp should be exactly 16 bytes long. */ | ||
416 | BUILD_BUG_ON(sizeof(struct timestamp) != 16); | ||
417 | |||
418 | while (count && !ftdev->minor[idx].buf) { | ||
419 | printk("time stamp buffer: trying to allocate %u time stamps for minor=%u.\n", count, idx); | ||
420 | ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp)); | ||
421 | count /= 2; | ||
422 | } | ||
423 | return ftdev->minor[idx].buf ? 0 : -ENOMEM; | ||
424 | } | ||
425 | |||
426 | static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) | ||
427 | { | ||
428 | ftdev->minor[idx].buf = NULL; | ||
429 | /* Make sure all cores have actually seen buf == NULL before | ||
430 | * yanking out the mappings from underneath them. */ | ||
431 | smp_wmb(); | ||
432 | free_ft_buffer(ftdev->minor[idx].buf); | ||
433 | } | ||
434 | |||
435 | static ssize_t write_timestamp_from_user(struct ft_buffer* buf, size_t len, | ||
436 | const char __user *from) | ||
437 | { | ||
438 | ssize_t consumed = 0; | ||
439 | struct timestamp ts; | ||
440 | |||
441 | /* don't give us partial timestamps */ | ||
442 | if (len % sizeof(ts)) | ||
443 | return -EINVAL; | ||
444 | |||
445 | while (len >= sizeof(ts)) { | ||
446 | if (copy_from_user(&ts, from, sizeof(ts))) { | ||
447 | consumed = -EFAULT; | ||
448 | goto out; | ||
449 | } | ||
450 | len -= sizeof(ts); | ||
451 | from += sizeof(ts); | ||
452 | consumed += sizeof(ts); | ||
453 | |||
454 | /* Note: this always adds to the buffer of the CPU-local | ||
455 | * device, not necessarily to the device that the system call | ||
456 | * was invoked on. This is admittedly a bit ugly, but requiring | ||
457 | * tasks to only write to the appropriate device would make | ||
458 | * tracing from userspace under global and clustered scheduling | ||
459 | * exceedingly difficult. Writing to remote buffers would | ||
460 | * require to not use ft_buffer_start_single_write(), which we | ||
461 | * want to do to reduce the number of atomic ops in the common | ||
462 | * case (which is the recording of CPU-local scheduling | ||
463 | * overheads). | ||
464 | */ | ||
465 | __add_timestamp_user(&ts); | ||
466 | } | ||
467 | |||
468 | out: | ||
469 | return consumed; | ||
470 | } | ||
471 | |||
472 | static int __init init_cpu_ft_overhead_trace(void) | ||
473 | { | ||
474 | int err, cpu; | ||
475 | |||
476 | printk("Initializing Feather-Trace per-cpu overhead tracing device.\n"); | ||
477 | err = ftdev_init(&cpu_overhead_dev, THIS_MODULE, | ||
478 | num_online_cpus(), "ft_cpu_trace"); | ||
479 | if (err) | ||
480 | goto err_out; | ||
481 | |||
482 | cpu_overhead_dev.alloc = alloc_timestamp_buffer; | ||
483 | cpu_overhead_dev.free = free_timestamp_buffer; | ||
484 | cpu_overhead_dev.write = write_timestamp_from_user; | ||
485 | |||
486 | err = register_ftdev(&cpu_overhead_dev); | ||
487 | if (err) | ||
488 | goto err_dealloc; | ||
489 | |||
490 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
491 | per_cpu(cpu_ts_seq_no, cpu) = 0; | ||
492 | } | ||
493 | |||
494 | return 0; | ||
495 | |||
496 | err_dealloc: | ||
497 | ftdev_exit(&cpu_overhead_dev); | ||
498 | err_out: | ||
499 | printk(KERN_WARNING "Could not register per-cpu ft_trace device.\n"); | ||
500 | return err; | ||
501 | } | ||
502 | |||
503 | static int __init init_msg_ft_overhead_trace(void) | ||
504 | { | ||
505 | int err, cpu; | ||
506 | |||
507 | printk("Initializing Feather-Trace per-cpu message overhead tracing device.\n"); | ||
508 | err = ftdev_init(&msg_overhead_dev, THIS_MODULE, | ||
509 | num_online_cpus(), "ft_msg_trace"); | ||
510 | if (err) | ||
511 | goto err_out; | ||
512 | |||
513 | msg_overhead_dev.alloc = alloc_timestamp_buffer; | ||
514 | msg_overhead_dev.free = free_timestamp_buffer; | ||
515 | msg_overhead_dev.calibrate = calibrate_tsc_offsets; | ||
516 | |||
517 | err = register_ftdev(&msg_overhead_dev); | ||
518 | if (err) | ||
519 | goto err_dealloc; | ||
520 | |||
521 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
522 | per_cpu(msg_ts_seq_no, cpu) = 0; | ||
523 | } | ||
524 | |||
525 | return 0; | ||
526 | |||
527 | err_dealloc: | ||
528 | ftdev_exit(&msg_overhead_dev); | ||
529 | err_out: | ||
530 | printk(KERN_WARNING "Could not register message ft_trace device.\n"); | ||
531 | return err; | ||
532 | } | ||
533 | |||
534 | |||
535 | static int __init init_ft_overhead_trace(void) | ||
536 | { | ||
537 | int err, i, j; | ||
538 | |||
539 | for (i = 0; i < NR_CPUS; i++) | ||
540 | for (j = 0; j < NR_CPUS; j++) | ||
541 | cycle_offset[i][j] = 0; | ||
542 | |||
543 | err = init_cpu_ft_overhead_trace(); | ||
544 | if (err) | ||
545 | return err; | ||
546 | |||
547 | err = init_msg_ft_overhead_trace(); | ||
548 | if (err) | ||
549 | ftdev_exit(&cpu_overhead_dev); | ||
550 | return err; | ||
551 | |||
552 | return 0; | ||
553 | } | ||
554 | |||
555 | static void __exit exit_ft_overhead_trace(void) | ||
556 | { | ||
557 | ftdev_exit(&cpu_overhead_dev); | ||
558 | ftdev_exit(&msg_overhead_dev); | ||
559 | } | ||
560 | |||
561 | module_init(init_ft_overhead_trace); | ||
562 | module_exit(exit_ft_overhead_trace); | ||