aboutsummaryrefslogtreecommitdiffstats
path: root/litmus/trace.c
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@mpi-sws.org>2015-08-09 07:18:45 -0400
committerBjoern Brandenburg <bbb@mpi-sws.org>2017-05-26 17:12:21 -0400
commitd13b06b1c18565476b0849ecd032aaf6d326b877 (patch)
tree7b9b606c8c433428587e889f82dade7da89ba721 /litmus/trace.c
parentd0ec47c1fd203fa87071fda00f8cf0a1826d6a34 (diff)
Feather-Trace: add LITMUS^RT overhead tracing infrastructure
This patch adds the main infrastructure for tracing overheads in LITMUS^RT. It does not yet introduce any tracepoints into the kernel. Add new overhead tracing event IDs XCALL_{START,END} -- cross-CPU function calls SEND_XCALL_{START,END} -- cross-CPU function call latency SCHED_TIMER_{START,END} -- any additional, plugin-specific scheduling timer TIMER_LATENCY -- latency of plugin-specific timer Feather-Trace: Add macros for message-passing timestamps Add timestamp tracing macros that reverse on which cores the timestamps are recorded. This intended to reduce tracing contention if some cores receive a lot of messages. These macros are useful for plugins based on message-passing such as the (non-mainline) G-EDF-MP. Feather-Trace: convert timestamps to atomic_t - patch by Namhoon Kim (UNC)
Diffstat (limited to 'litmus/trace.c')
-rw-r--r--litmus/trace.c562
1 files changed, 562 insertions, 0 deletions
diff --git a/litmus/trace.c b/litmus/trace.c
new file mode 100644
index 000000000000..64d63943f65d
--- /dev/null
+++ b/litmus/trace.c
@@ -0,0 +1,562 @@
1#include <linux/sched.h>
2#include <linux/module.h>
3#include <linux/uaccess.h>
4
5#include <litmus/ftdev.h>
6#include <litmus/trace.h>
7
8/* dummy definition of is_realtime() */
9#define is_realtime(t) (0)
10
11/******************************************************************************/
12/* Allocation */
13/******************************************************************************/
14
15static struct ftdev cpu_overhead_dev;
16static struct ftdev msg_overhead_dev;
17
18#define cpu_trace_ts_buf(cpu) cpu_overhead_dev.minor[(cpu)].buf
19#define msg_trace_ts_buf(cpu) msg_overhead_dev.minor[(cpu)].buf
20
21DEFINE_PER_CPU(atomic_t, irq_fired_count;)
22DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, cpu_irq_fired_count);
23
24static DEFINE_PER_CPU(unsigned int, cpu_ts_seq_no);
25static DEFINE_PER_CPU(atomic_t, msg_ts_seq_no);
26
27static int64_t cycle_offset[NR_CPUS][NR_CPUS];
28
29void ft_irq_fired(void)
30{
31 /* Only called with preemptions disabled. */
32 atomic_inc(this_cpu_ptr(&irq_fired_count));
33 atomic_inc(this_cpu_ptr(&cpu_irq_fired_count));
34}
35
36static inline void clear_irq_fired(void)
37{
38 atomic_set(raw_cpu_ptr(&irq_fired_count), 0);
39}
40
41static inline unsigned int get_and_clear_irq_fired(void)
42{
43 /* This is potentially not atomic since we might migrate if
44 * preemptions are not disabled. As a tradeoff between
45 * accuracy and tracing overheads, this seems acceptable.
46 * If it proves to be a problem, then one could add a callback
47 * from the migration code to invalidate irq_fired_count.
48 */
49 return atomic_xchg(raw_cpu_ptr(&irq_fired_count), 0);
50}
51
52static inline unsigned int get_and_clear_irq_fired_for_cpu(int cpu)
53{
54 return atomic_xchg(&per_cpu(irq_fired_count, cpu), 0);
55}
56
57static inline void cpu_clear_irq_fired(void)
58{
59 atomic_set(raw_cpu_ptr(&cpu_irq_fired_count), 0);
60}
61
62static inline unsigned int cpu_get_and_clear_irq_fired(void)
63{
64 return atomic_xchg(raw_cpu_ptr(&cpu_irq_fired_count), 0);
65}
66
67static inline void save_irq_flags(struct timestamp *ts, unsigned int irq_count)
68{
69 /* Store how many interrupts occurred. */
70 ts->irq_count = irq_count;
71 /* Extra flag because ts->irq_count overflows quickly. */
72 ts->irq_flag = irq_count > 0;
73}
74
75#define NO_IRQ_COUNT 0
76#define LOCAL_IRQ_COUNT 1
77#define REMOTE_IRQ_COUNT 2
78
79#define DO_NOT_RECORD_TIMESTAMP 0
80#define RECORD_LOCAL_TIMESTAMP 1
81#define RECORD_OFFSET_TIMESTAMP 2
82
83static inline void __write_record(
84 uint8_t event,
85 uint8_t type,
86 uint16_t pid_fragment,
87 unsigned int irq_count,
88 int record_irq,
89 int hide_irq,
90 uint64_t timestamp,
91 int record_timestamp,
92
93 int only_single_writer,
94 int is_cpu_timestamp,
95 int local_cpu,
96 uint8_t other_cpu)
97{
98 unsigned long flags;
99 unsigned int seq_no;
100 struct timestamp *ts;
101 int cpu;
102 struct ft_buffer* buf;
103
104 /* Avoid preemptions while recording the timestamp. This reduces the
105 * number of "out of order" timestamps in the stream and makes
106 * post-processing easier. */
107
108 local_irq_save(flags);
109
110 if (local_cpu)
111 cpu = smp_processor_id();
112 else
113 cpu = other_cpu;
114
115 /* resolved during function inlining */
116 if (is_cpu_timestamp) {
117 seq_no = __this_cpu_inc_return(cpu_ts_seq_no);
118 buf = cpu_trace_ts_buf(cpu);
119 } else {
120 seq_no = atomic_fetch_inc(&per_cpu(msg_ts_seq_no, cpu));
121 buf = msg_trace_ts_buf(cpu);
122 }
123
124 /* If buf is non-NULL here, then the buffer cannot be deallocated until
125 * we turn interrupts on again. This is because free_timestamp_buffer()
126 * indirectly causes TLB invalidations due to modifications of the
127 * kernel address space, namely via vfree() in free_ft_buffer(), which
128 * cannot be processed until we turn on interrupts again.
129 */
130
131 if (buf &&
132 (only_single_writer /* resolved during function inlining */
133 ? ft_buffer_start_single_write(buf, (void**) &ts)
134 : ft_buffer_start_write(buf, (void**) &ts))) {
135 ts->event = event;
136 ts->seq_no = seq_no;
137
138 ts->task_type = type;
139 ts->pid = pid_fragment;
140
141 ts->cpu = cpu;
142
143 if (record_irq) {
144 if (local_cpu)
145 irq_count = cpu_get_and_clear_irq_fired();
146 else
147 irq_count = get_and_clear_irq_fired_for_cpu(cpu);
148 }
149
150 save_irq_flags(ts, irq_count - hide_irq);
151
152 if (record_timestamp)
153 timestamp = ft_timestamp();
154 if (record_timestamp == RECORD_OFFSET_TIMESTAMP)
155 timestamp += cycle_offset[smp_processor_id()][cpu];
156
157 ts->timestamp = timestamp;
158 ft_buffer_finish_write(buf, ts);
159 }
160
161 local_irq_restore(flags);
162}
163
164
165static inline void write_cpu_timestamp(
166 uint8_t event,
167 uint8_t type,
168 uint16_t pid_fragment,
169 unsigned int irq_count,
170 int record_irq,
171 int hide_irq,
172 uint64_t timestamp,
173 int record_timestamp)
174{
175 __write_record(event, type,
176 pid_fragment,
177 irq_count, record_irq, hide_irq,
178 timestamp, record_timestamp,
179 1 /* only_single_writer */,
180 1 /* is_cpu_timestamp */,
181 1 /* local_cpu */,
182 0xff /* other_cpu */);
183}
184
185static inline void save_msg_timestamp(
186 uint8_t event,
187 int hide_irq)
188{
189 struct task_struct *t = current;
190 __write_record(event, is_realtime(t) ? TSK_RT : TSK_BE,
191 t->pid,
192 0, LOCAL_IRQ_COUNT, hide_irq,
193 0, RECORD_LOCAL_TIMESTAMP,
194 0 /* only_single_writer */,
195 0 /* is_cpu_timestamp */,
196 1 /* local_cpu */,
197 0xff /* other_cpu */);
198}
199
200static inline void save_remote_msg_timestamp(
201 uint8_t event,
202 uint8_t remote_cpu)
203{
204 struct task_struct *t = current;
205 __write_record(event, is_realtime(t) ? TSK_RT : TSK_BE,
206 t->pid,
207 0, REMOTE_IRQ_COUNT, 0,
208 0, RECORD_OFFSET_TIMESTAMP,
209 0 /* only_single_writer */,
210 0 /* is_cpu_timestamp */,
211 0 /* local_cpu */,
212 remote_cpu);
213}
214
215feather_callback void save_cpu_timestamp_def(unsigned long event,
216 unsigned long type)
217{
218 write_cpu_timestamp(event, type,
219 current->pid,
220 0, LOCAL_IRQ_COUNT, 0,
221 0, RECORD_LOCAL_TIMESTAMP);
222}
223
224feather_callback void save_cpu_timestamp_task(unsigned long event,
225 unsigned long t_ptr)
226{
227 struct task_struct *t = (struct task_struct *) t_ptr;
228 int rt = is_realtime(t);
229
230 write_cpu_timestamp(event, rt ? TSK_RT : TSK_BE,
231 t->pid,
232 0, LOCAL_IRQ_COUNT, 0,
233 0, RECORD_LOCAL_TIMESTAMP);
234}
235
236/* fake timestamp to user-reported time */
237feather_callback void save_cpu_timestamp_time(unsigned long event,
238 unsigned long ptr)
239{
240 uint64_t* time = (uint64_t*) ptr;
241
242 write_cpu_timestamp(event, is_realtime(current) ? TSK_RT : TSK_BE,
243 current->pid,
244 0, LOCAL_IRQ_COUNT, 0,
245 *time, DO_NOT_RECORD_TIMESTAMP);
246}
247
248/* Record user-reported IRQ count */
249feather_callback void save_cpu_timestamp_irq(unsigned long event,
250 unsigned long irq_counter_ptr)
251{
252 uint64_t* irqs = (uint64_t*) irq_counter_ptr;
253
254 write_cpu_timestamp(event, is_realtime(current) ? TSK_RT : TSK_BE,
255 current->pid,
256 *irqs, NO_IRQ_COUNT, 0,
257 0, RECORD_LOCAL_TIMESTAMP);
258}
259
260/* Record to remote trace buffer */
261feather_callback void msg_sent_to(unsigned long event, unsigned long to)
262{
263 save_remote_msg_timestamp(event, to);
264}
265
266/* Record to local trace buffer */
267feather_callback void msg_sent_local(unsigned long event)
268{
269 save_msg_timestamp(event, 0);
270}
271
272/* Suppresses one IRQ from the irq count. Used by TS_SEND_RESCHED_END, which is
273 * called from within an interrupt that is expected. */
274feather_callback void msg_received_local(unsigned long event)
275{
276 save_msg_timestamp(event, 1);
277}
278
279/* Record to remote trace buffer */
280feather_callback void msg_received_from(unsigned long event, unsigned long from)
281{
282 save_remote_msg_timestamp(event, from);
283}
284
285static void __add_timestamp_user(struct timestamp *pre_recorded)
286{
287 unsigned long flags;
288 unsigned int seq_no;
289 struct timestamp *ts;
290 struct ft_buffer* buf;
291 int cpu;
292
293 local_irq_save(flags);
294
295 cpu = smp_processor_id();
296 buf = cpu_trace_ts_buf(cpu);
297
298 seq_no = __this_cpu_inc_return(cpu_ts_seq_no);
299 if (buf && ft_buffer_start_single_write(buf, (void**) &ts)) {
300 *ts = *pre_recorded;
301 ts->seq_no = seq_no;
302 ts->cpu = raw_smp_processor_id();
303 save_irq_flags(ts, get_and_clear_irq_fired());
304 ft_buffer_finish_write(buf, ts);
305 }
306
307 local_irq_restore(flags);
308}
309
310/******************************************************************************/
311/* DEVICE FILE DRIVER */
312/******************************************************************************/
313
314struct calibrate_info {
315 atomic_t ready;
316
317 uint64_t cycle_count;
318};
319
320static void calibrate_helper(void *_info)
321{
322 struct calibrate_info *info = _info;
323 /* check in with master */
324 atomic_inc(&info->ready);
325
326 /* wait for master to signal start */
327 while (atomic_read(&info->ready))
328 cpu_relax();
329
330 /* report time stamp */
331 info->cycle_count = ft_timestamp();
332
333 /* tell master that we are done */
334 atomic_inc(&info->ready);
335}
336
337
338static int64_t calibrate_cpu(int cpu)
339{
340 uint64_t cycles;
341 struct calibrate_info info;
342 unsigned long flags;
343 int64_t delta;
344
345 atomic_set(&info.ready, 0);
346 info.cycle_count = 0;
347 smp_wmb();
348
349 smp_call_function_single(cpu, calibrate_helper, &info, 0);
350
351 /* wait for helper to become active */
352 while (!atomic_read(&info.ready))
353 cpu_relax();
354
355 /* avoid interrupt interference */
356 local_irq_save(flags);
357
358 /* take measurement */
359 atomic_set(&info.ready, 0);
360 smp_wmb();
361 cycles = ft_timestamp();
362
363 /* wait for helper reading */
364 while (!atomic_read(&info.ready))
365 cpu_relax();
366
367 /* positive offset: the other guy is ahead of us */
368 delta = (int64_t) info.cycle_count;
369 delta -= (int64_t) cycles;
370
371 local_irq_restore(flags);
372
373 return delta;
374}
375
376#define NUM_SAMPLES 10
377
378static long calibrate_tsc_offsets(struct ftdev* ftdev, unsigned int idx,
379 unsigned long uarg)
380{
381 int cpu, self, i;
382 int64_t delta, sample;
383
384 preempt_disable();
385 self = smp_processor_id();
386
387 if (uarg)
388 printk(KERN_INFO "Feather-Trace: determining TSC offsets for P%d\n", self);
389
390 for_each_online_cpu(cpu)
391 if (cpu != self) {
392 delta = calibrate_cpu(cpu);
393 for (i = 1; i < NUM_SAMPLES; i++) {
394 sample = calibrate_cpu(cpu);
395 delta = sample < delta ? sample : delta;
396 }
397
398 cycle_offset[self][cpu] = delta;
399
400 if (uarg)
401 printk(KERN_INFO "Feather-Trace: TSC offset for P%d->P%d is %lld cycles.\n",
402 self, cpu, cycle_offset[self][cpu]);
403 }
404
405 preempt_enable();
406 return 0;
407}
408
409#define NO_TIMESTAMPS (2 << CONFIG_SCHED_OVERHEAD_TRACE_SHIFT)
410
411static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
412{
413 unsigned int count = NO_TIMESTAMPS;
414
415 /* An overhead-tracing timestamp should be exactly 16 bytes long. */
416 BUILD_BUG_ON(sizeof(struct timestamp) != 16);
417
418 while (count && !ftdev->minor[idx].buf) {
419 printk("time stamp buffer: trying to allocate %u time stamps for minor=%u.\n", count, idx);
420 ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp));
421 count /= 2;
422 }
423 return ftdev->minor[idx].buf ? 0 : -ENOMEM;
424}
425
426static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
427{
428 ftdev->minor[idx].buf = NULL;
429 /* Make sure all cores have actually seen buf == NULL before
430 * yanking out the mappings from underneath them. */
431 smp_wmb();
432 free_ft_buffer(ftdev->minor[idx].buf);
433}
434
435static ssize_t write_timestamp_from_user(struct ft_buffer* buf, size_t len,
436 const char __user *from)
437{
438 ssize_t consumed = 0;
439 struct timestamp ts;
440
441 /* don't give us partial timestamps */
442 if (len % sizeof(ts))
443 return -EINVAL;
444
445 while (len >= sizeof(ts)) {
446 if (copy_from_user(&ts, from, sizeof(ts))) {
447 consumed = -EFAULT;
448 goto out;
449 }
450 len -= sizeof(ts);
451 from += sizeof(ts);
452 consumed += sizeof(ts);
453
454 /* Note: this always adds to the buffer of the CPU-local
455 * device, not necessarily to the device that the system call
456 * was invoked on. This is admittedly a bit ugly, but requiring
457 * tasks to only write to the appropriate device would make
458 * tracing from userspace under global and clustered scheduling
459 * exceedingly difficult. Writing to remote buffers would
460 * require to not use ft_buffer_start_single_write(), which we
461 * want to do to reduce the number of atomic ops in the common
462 * case (which is the recording of CPU-local scheduling
463 * overheads).
464 */
465 __add_timestamp_user(&ts);
466 }
467
468out:
469 return consumed;
470}
471
472static int __init init_cpu_ft_overhead_trace(void)
473{
474 int err, cpu;
475
476 printk("Initializing Feather-Trace per-cpu overhead tracing device.\n");
477 err = ftdev_init(&cpu_overhead_dev, THIS_MODULE,
478 num_online_cpus(), "ft_cpu_trace");
479 if (err)
480 goto err_out;
481
482 cpu_overhead_dev.alloc = alloc_timestamp_buffer;
483 cpu_overhead_dev.free = free_timestamp_buffer;
484 cpu_overhead_dev.write = write_timestamp_from_user;
485
486 err = register_ftdev(&cpu_overhead_dev);
487 if (err)
488 goto err_dealloc;
489
490 for (cpu = 0; cpu < NR_CPUS; cpu++) {
491 per_cpu(cpu_ts_seq_no, cpu) = 0;
492 }
493
494 return 0;
495
496err_dealloc:
497 ftdev_exit(&cpu_overhead_dev);
498err_out:
499 printk(KERN_WARNING "Could not register per-cpu ft_trace device.\n");
500 return err;
501}
502
503static int __init init_msg_ft_overhead_trace(void)
504{
505 int err, cpu;
506
507 printk("Initializing Feather-Trace per-cpu message overhead tracing device.\n");
508 err = ftdev_init(&msg_overhead_dev, THIS_MODULE,
509 num_online_cpus(), "ft_msg_trace");
510 if (err)
511 goto err_out;
512
513 msg_overhead_dev.alloc = alloc_timestamp_buffer;
514 msg_overhead_dev.free = free_timestamp_buffer;
515 msg_overhead_dev.calibrate = calibrate_tsc_offsets;
516
517 err = register_ftdev(&msg_overhead_dev);
518 if (err)
519 goto err_dealloc;
520
521 for (cpu = 0; cpu < NR_CPUS; cpu++) {
522 atomic_set(&per_cpu(msg_ts_seq_no, cpu), 0);
523 }
524
525 return 0;
526
527err_dealloc:
528 ftdev_exit(&msg_overhead_dev);
529err_out:
530 printk(KERN_WARNING "Could not register message ft_trace device.\n");
531 return err;
532}
533
534
535static int __init init_ft_overhead_trace(void)
536{
537 int err, i, j;
538
539 for (i = 0; i < NR_CPUS; i++)
540 for (j = 0; j < NR_CPUS; j++)
541 cycle_offset[i][j] = 0;
542
543 err = init_cpu_ft_overhead_trace();
544 if (err)
545 return err;
546
547 err = init_msg_ft_overhead_trace();
548 if (err)
549 ftdev_exit(&cpu_overhead_dev);
550 return err;
551
552 return 0;
553}
554
555static void __exit exit_ft_overhead_trace(void)
556{
557 ftdev_exit(&cpu_overhead_dev);
558 ftdev_exit(&msg_overhead_dev);
559}
560
561module_init(init_ft_overhead_trace);
562module_exit(exit_ft_overhead_trace);