diff options
author | Andrea Bastoni <bastoni@cs.unc.edu> | 2009-12-17 21:26:50 -0500 |
---|---|---|
committer | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-29 17:12:27 -0400 |
commit | 96979188007a0671d3f067d7edf144742d7433ee (patch) | |
tree | 8b93dacea74499926cc4fcaa0879dbfe3ace9d7f /litmus/sched_trace.c | |
parent | cf3f4bd8db320f3f487d66bdec924e926f004787 (diff) |
[ported from 2008.3] Add tracing support and hook up Litmus KConfig for x86
- fix requesting more than 2^11 pages (MAX_ORDER)
to system allocator
Still to be merged:
- feather-trace generic implementation
Diffstat (limited to 'litmus/sched_trace.c')
-rw-r--r-- | litmus/sched_trace.c | 510 |
1 files changed, 510 insertions, 0 deletions
diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c new file mode 100644 index 000000000000..5befc1e21b1f --- /dev/null +++ b/litmus/sched_trace.c | |||
@@ -0,0 +1,510 @@ | |||
1 | /* | ||
2 | * sched_trace.c -- record scheduling events to a byte stream. | ||
3 | * | ||
4 | * TODO: Move ring buffer to a lockfree implementation. | ||
5 | */ | ||
6 | |||
7 | #include <linux/spinlock.h> | ||
8 | #include <linux/fs.h> | ||
9 | #include <linux/cdev.h> | ||
10 | #include <linux/semaphore.h> | ||
11 | #include <asm/uaccess.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/sysrq.h> | ||
14 | |||
15 | #include <litmus/sched_trace.h> | ||
16 | #include <litmus/litmus.h> | ||
17 | |||
18 | typedef struct { | ||
19 | /* guard read and write pointers */ | ||
20 | spinlock_t lock; | ||
21 | /* guard against concurrent freeing of buffer */ | ||
22 | rwlock_t del_lock; | ||
23 | |||
24 | /* memory allocated for ring buffer */ | ||
25 | unsigned long order; | ||
26 | char* buf; | ||
27 | char* end; | ||
28 | |||
29 | /* Read/write pointer. May not cross. | ||
30 | * They point to the position of next write and | ||
31 | * last read. | ||
32 | */ | ||
33 | char* writep; | ||
34 | char* readp; | ||
35 | |||
36 | } ring_buffer_t; | ||
37 | |||
38 | #define EMPTY_RING_BUFFER { \ | ||
39 | .lock = SPIN_LOCK_UNLOCKED, \ | ||
40 | .del_lock = RW_LOCK_UNLOCKED, \ | ||
41 | .buf = NULL, \ | ||
42 | .end = NULL, \ | ||
43 | .writep = NULL, \ | ||
44 | .readp = NULL \ | ||
45 | } | ||
46 | |||
47 | void rb_init(ring_buffer_t* buf) | ||
48 | { | ||
49 | *buf = (ring_buffer_t) EMPTY_RING_BUFFER; | ||
50 | } | ||
51 | |||
52 | int rb_alloc_buf(ring_buffer_t* buf, unsigned long order) | ||
53 | { | ||
54 | unsigned long flags; | ||
55 | int error = 0; | ||
56 | char *mem; | ||
57 | |||
58 | /* do memory allocation while not atomic */ | ||
59 | mem = (char *) __get_free_pages(GFP_KERNEL, order); | ||
60 | if (!mem) | ||
61 | return -ENOMEM; | ||
62 | write_lock_irqsave(&buf->del_lock, flags); | ||
63 | BUG_ON(buf->buf); | ||
64 | buf->buf = mem; | ||
65 | buf->end = buf->buf + PAGE_SIZE * (1 << order) - 1; | ||
66 | memset(buf->buf, 0xff, buf->end - buf->buf); | ||
67 | buf->order = order; | ||
68 | buf->writep = buf->buf + 1; | ||
69 | buf->readp = buf->buf; | ||
70 | write_unlock_irqrestore(&buf->del_lock, flags); | ||
71 | return error; | ||
72 | } | ||
73 | |||
74 | int rb_free_buf(ring_buffer_t* buf) | ||
75 | { | ||
76 | unsigned long flags; | ||
77 | int error = 0; | ||
78 | write_lock_irqsave(&buf->del_lock, flags); | ||
79 | BUG_ON(!buf->buf); | ||
80 | free_pages((unsigned long) buf->buf, buf->order); | ||
81 | buf->buf = NULL; | ||
82 | buf->end = NULL; | ||
83 | buf->writep = NULL; | ||
84 | buf->readp = NULL; | ||
85 | write_unlock_irqrestore(&buf->del_lock, flags); | ||
86 | return error; | ||
87 | } | ||
88 | |||
89 | /* Assumption: concurrent writes are serialized externally | ||
90 | * | ||
91 | * Will only succeed if there is enough space for all len bytes. | ||
92 | */ | ||
93 | int rb_put(ring_buffer_t* buf, char* mem, size_t len) | ||
94 | { | ||
95 | unsigned long flags; | ||
96 | char* r , *w; | ||
97 | int error = 0; | ||
98 | read_lock_irqsave(&buf->del_lock, flags); | ||
99 | if (!buf->buf) { | ||
100 | error = -ENODEV; | ||
101 | goto out; | ||
102 | } | ||
103 | spin_lock(&buf->lock); | ||
104 | r = buf->readp; | ||
105 | w = buf->writep; | ||
106 | spin_unlock(&buf->lock); | ||
107 | if (r < w && buf->end - w >= len - 1) { | ||
108 | /* easy case: there is enough space in the buffer | ||
109 | * to write it in one continous chunk*/ | ||
110 | memcpy(w, mem, len); | ||
111 | w += len; | ||
112 | if (w > buf->end) | ||
113 | /* special case: fit exactly into buffer | ||
114 | * w is now buf->end + 1 | ||
115 | */ | ||
116 | w = buf->buf; | ||
117 | } else if (w < r && r - w >= len) { /* >= len because may not cross */ | ||
118 | /* we are constrained by the read pointer but we there | ||
119 | * is enough space | ||
120 | */ | ||
121 | memcpy(w, mem, len); | ||
122 | w += len; | ||
123 | } else if (r <= w && buf->end - w < len - 1) { | ||
124 | /* the wrap around case: there may or may not be space */ | ||
125 | if ((buf->end - w) + (r - buf->buf) >= len - 1) { | ||
126 | /* copy chunk that fits at the end */ | ||
127 | memcpy(w, mem, buf->end - w + 1); | ||
128 | mem += buf->end - w + 1; | ||
129 | len -= (buf->end - w + 1); | ||
130 | w = buf->buf; | ||
131 | /* copy the rest */ | ||
132 | memcpy(w, mem, len); | ||
133 | w += len; | ||
134 | } | ||
135 | else | ||
136 | error = -ENOMEM; | ||
137 | } else { | ||
138 | error = -ENOMEM; | ||
139 | } | ||
140 | if (!error) { | ||
141 | spin_lock(&buf->lock); | ||
142 | buf->writep = w; | ||
143 | spin_unlock(&buf->lock); | ||
144 | } | ||
145 | out: | ||
146 | read_unlock_irqrestore(&buf->del_lock, flags); | ||
147 | return error; | ||
148 | } | ||
149 | |||
150 | /* Assumption: concurrent reads are serialized externally */ | ||
151 | int rb_get(ring_buffer_t* buf, char* mem, size_t len) | ||
152 | { | ||
153 | unsigned long flags; | ||
154 | char* r , *w; | ||
155 | int error = 0; | ||
156 | read_lock_irqsave(&buf->del_lock, flags); | ||
157 | if (!buf->buf) { | ||
158 | error = -ENODEV; | ||
159 | goto out; | ||
160 | } | ||
161 | spin_lock(&buf->lock); | ||
162 | r = buf->readp; | ||
163 | w = buf->writep; | ||
164 | spin_unlock(&buf->lock); | ||
165 | |||
166 | if (w <= r && buf->end - r >= len) { | ||
167 | /* easy case: there is enough data in the buffer | ||
168 | * to get it in one chunk*/ | ||
169 | memcpy(mem, r + 1, len); | ||
170 | r += len; | ||
171 | error = len; | ||
172 | |||
173 | } else if (r + 1 < w && w - r - 1 >= len) { | ||
174 | /* we are constrained by the write pointer but | ||
175 | * there is enough data | ||
176 | */ | ||
177 | memcpy(mem, r + 1, len); | ||
178 | r += len; | ||
179 | error = len; | ||
180 | |||
181 | } else if (r + 1 < w && w - r - 1 < len) { | ||
182 | /* we are constrained by the write pointer and there | ||
183 | * there is not enough data | ||
184 | */ | ||
185 | memcpy(mem, r + 1, w - r - 1); | ||
186 | error = w - r - 1; | ||
187 | r += w - r - 1; | ||
188 | |||
189 | } else if (w <= r && buf->end - r < len) { | ||
190 | /* the wrap around case: there may or may not be enough data | ||
191 | * first let's get what is available | ||
192 | */ | ||
193 | memcpy(mem, r + 1, buf->end - r); | ||
194 | error += (buf->end - r); | ||
195 | mem += (buf->end - r); | ||
196 | len -= (buf->end - r); | ||
197 | r += (buf->end - r); | ||
198 | |||
199 | if (w > buf->buf) { | ||
200 | /* there is more to get */ | ||
201 | r = buf->buf - 1; | ||
202 | if (w - r >= len) { | ||
203 | /* plenty */ | ||
204 | memcpy(mem, r + 1, len); | ||
205 | error += len; | ||
206 | r += len; | ||
207 | } else { | ||
208 | memcpy(mem, r + 1, w - r - 1); | ||
209 | error += w - r - 1; | ||
210 | r += w - r - 1; | ||
211 | } | ||
212 | } | ||
213 | } /* nothing available */ | ||
214 | |||
215 | if (error > 0) { | ||
216 | spin_lock(&buf->lock); | ||
217 | buf->readp = r; | ||
218 | spin_unlock(&buf->lock); | ||
219 | } | ||
220 | out: | ||
221 | read_unlock_irqrestore(&buf->del_lock, flags); | ||
222 | return error; | ||
223 | } | ||
224 | |||
225 | |||
226 | |||
227 | /******************************************************************************/ | ||
228 | /* DEVICE FILE DRIVER */ | ||
229 | /******************************************************************************/ | ||
230 | |||
231 | |||
232 | |||
233 | /* Allocate a buffer of about 1 MB per CPU. | ||
234 | * | ||
235 | */ | ||
236 | #define BUFFER_ORDER 8 | ||
237 | |||
238 | typedef struct { | ||
239 | ring_buffer_t buf; | ||
240 | atomic_t reader_cnt; | ||
241 | struct semaphore reader_mutex; | ||
242 | } trace_buffer_t; | ||
243 | |||
244 | |||
245 | /* This does not initialize the semaphore!! */ | ||
246 | |||
247 | #define EMPTY_TRACE_BUFFER \ | ||
248 | { .buf = EMPTY_RING_BUFFER, .reader_cnt = ATOMIC_INIT(0)} | ||
249 | |||
250 | static spinlock_t log_buffer_lock = SPIN_LOCK_UNLOCKED; | ||
251 | static trace_buffer_t log_buffer = EMPTY_TRACE_BUFFER; | ||
252 | |||
253 | static void init_log_buffer(void) | ||
254 | { | ||
255 | /* only initialize the mutex, the rest was initialized as part | ||
256 | * of the static initialization macro | ||
257 | */ | ||
258 | init_MUTEX(&log_buffer.reader_mutex); | ||
259 | } | ||
260 | |||
261 | static ssize_t log_read(struct file *filp, char __user *to, size_t len, | ||
262 | loff_t *f_pos) | ||
263 | { | ||
264 | /* we ignore f_pos, this is strictly sequential */ | ||
265 | |||
266 | ssize_t error = -EINVAL; | ||
267 | char* mem; | ||
268 | trace_buffer_t *buf = filp->private_data; | ||
269 | |||
270 | if (down_interruptible(&buf->reader_mutex)) { | ||
271 | error = -ERESTARTSYS; | ||
272 | goto out; | ||
273 | } | ||
274 | |||
275 | if (len > 64 * 1024) | ||
276 | len = 64 * 1024; | ||
277 | mem = kmalloc(len, GFP_KERNEL); | ||
278 | if (!mem) { | ||
279 | error = -ENOMEM; | ||
280 | goto out_unlock; | ||
281 | } | ||
282 | |||
283 | error = rb_get(&buf->buf, mem, len); | ||
284 | while (!error) { | ||
285 | set_current_state(TASK_INTERRUPTIBLE); | ||
286 | schedule_timeout(110); | ||
287 | if (signal_pending(current)) | ||
288 | error = -ERESTARTSYS; | ||
289 | else | ||
290 | error = rb_get(&buf->buf, mem, len); | ||
291 | } | ||
292 | |||
293 | if (error > 0 && copy_to_user(to, mem, error)) | ||
294 | error = -EFAULT; | ||
295 | |||
296 | kfree(mem); | ||
297 | out_unlock: | ||
298 | up(&buf->reader_mutex); | ||
299 | out: | ||
300 | return error; | ||
301 | } | ||
302 | |||
303 | /* defined in kernel/printk.c */ | ||
304 | extern int trace_override; | ||
305 | extern int trace_recurse; | ||
306 | |||
307 | /* log_open - open the global log message ring buffer. | ||
308 | */ | ||
309 | static int log_open(struct inode *in, struct file *filp) | ||
310 | { | ||
311 | int error = -EINVAL; | ||
312 | trace_buffer_t* buf; | ||
313 | |||
314 | buf = &log_buffer; | ||
315 | |||
316 | if (down_interruptible(&buf->reader_mutex)) { | ||
317 | error = -ERESTARTSYS; | ||
318 | goto out; | ||
319 | } | ||
320 | |||
321 | /* first open must allocate buffers */ | ||
322 | if (atomic_inc_return(&buf->reader_cnt) == 1) { | ||
323 | if ((error = rb_alloc_buf(&buf->buf, BUFFER_ORDER))) | ||
324 | { | ||
325 | atomic_dec(&buf->reader_cnt); | ||
326 | goto out_unlock; | ||
327 | } | ||
328 | } | ||
329 | |||
330 | error = 0; | ||
331 | filp->private_data = buf; | ||
332 | printk(KERN_DEBUG "sched_trace buf: from 0x%p to 0x%p length: %x\n", | ||
333 | buf->buf.buf, buf->buf.end, | ||
334 | (unsigned int) (buf->buf.end - buf->buf.buf)); | ||
335 | |||
336 | /* override printk() */ | ||
337 | trace_override++; | ||
338 | |||
339 | out_unlock: | ||
340 | up(&buf->reader_mutex); | ||
341 | out: | ||
342 | return error; | ||
343 | } | ||
344 | |||
345 | static int log_release(struct inode *in, struct file *filp) | ||
346 | { | ||
347 | int error = -EINVAL; | ||
348 | trace_buffer_t* buf = filp->private_data; | ||
349 | |||
350 | BUG_ON(!filp->private_data); | ||
351 | |||
352 | if (down_interruptible(&buf->reader_mutex)) { | ||
353 | error = -ERESTARTSYS; | ||
354 | goto out; | ||
355 | } | ||
356 | |||
357 | /* last release must deallocate buffers */ | ||
358 | if (atomic_dec_return(&buf->reader_cnt) == 0) { | ||
359 | error = rb_free_buf(&buf->buf); | ||
360 | } | ||
361 | |||
362 | /* release printk() overriding */ | ||
363 | trace_override--; | ||
364 | |||
365 | up(&buf->reader_mutex); | ||
366 | out: | ||
367 | return error; | ||
368 | } | ||
369 | |||
370 | /******************************************************************************/ | ||
371 | /* Device Registration */ | ||
372 | /******************************************************************************/ | ||
373 | |||
374 | /* the major numbes are from the unassigned/local use block | ||
375 | * | ||
376 | * This should be converted to dynamic allocation at some point... | ||
377 | */ | ||
378 | #define LOG_MAJOR 251 | ||
379 | |||
380 | /* log_fops - The file operations for accessing the global LITMUS log message | ||
381 | * buffer. | ||
382 | * | ||
383 | * Except for opening the device file it uses the same operations as trace_fops. | ||
384 | */ | ||
385 | struct file_operations log_fops = { | ||
386 | .owner = THIS_MODULE, | ||
387 | .open = log_open, | ||
388 | .release = log_release, | ||
389 | .read = log_read, | ||
390 | }; | ||
391 | |||
392 | static int __init register_buffer_dev(const char* name, | ||
393 | struct file_operations* fops, | ||
394 | int major, int count) | ||
395 | { | ||
396 | dev_t trace_dev; | ||
397 | struct cdev *cdev; | ||
398 | int error = 0; | ||
399 | |||
400 | trace_dev = MKDEV(major, 0); | ||
401 | error = register_chrdev_region(trace_dev, count, name); | ||
402 | if (error) | ||
403 | { | ||
404 | printk(KERN_WARNING "sched trace: " | ||
405 | "Could not register major/minor number %d\n", major); | ||
406 | return error; | ||
407 | } | ||
408 | cdev = cdev_alloc(); | ||
409 | if (!cdev) { | ||
410 | printk(KERN_WARNING "sched trace: " | ||
411 | "Could not get a cdev for %s.\n", name); | ||
412 | return -ENOMEM; | ||
413 | } | ||
414 | cdev->owner = THIS_MODULE; | ||
415 | cdev->ops = fops; | ||
416 | error = cdev_add(cdev, trace_dev, count); | ||
417 | if (error) { | ||
418 | printk(KERN_WARNING "sched trace: " | ||
419 | "add_cdev failed for %s.\n", name); | ||
420 | return -ENOMEM; | ||
421 | } | ||
422 | return error; | ||
423 | |||
424 | } | ||
425 | |||
426 | #ifdef CONFIG_MAGIC_SYSRQ | ||
427 | |||
428 | static void sysrq_dump_trace_buffer(int key, struct tty_struct *tty) | ||
429 | { | ||
430 | dump_trace_buffer(100); | ||
431 | } | ||
432 | |||
433 | static struct sysrq_key_op sysrq_dump_trace_buffer_op = { | ||
434 | .handler = sysrq_dump_trace_buffer, | ||
435 | .help_msg = "dump-trace-buffer(Y)", | ||
436 | .action_msg = "writing content of TRACE() buffer", | ||
437 | }; | ||
438 | |||
439 | #endif | ||
440 | |||
441 | static int __init init_sched_trace(void) | ||
442 | { | ||
443 | printk("Initializing TRACE() device\n"); | ||
444 | init_log_buffer(); | ||
445 | |||
446 | #ifdef CONFIG_MAGIC_SYSRQ | ||
447 | /* offer some debugging help */ | ||
448 | if (!register_sysrq_key('y', &sysrq_dump_trace_buffer_op)) | ||
449 | printk("Registered dump-trace-buffer(Y) magic sysrq.\n"); | ||
450 | else | ||
451 | printk("Could not register dump-trace-buffer(Y) magic sysrq.\n"); | ||
452 | #endif | ||
453 | |||
454 | |||
455 | return register_buffer_dev("litmus_log", &log_fops, | ||
456 | LOG_MAJOR, 1); | ||
457 | } | ||
458 | |||
459 | module_init(init_sched_trace); | ||
460 | |||
461 | #define MSG_SIZE 255 | ||
462 | static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer); | ||
463 | |||
464 | /* sched_trace_log_message - This is the only function that accesses the the | ||
465 | * log buffer inside the kernel for writing. | ||
466 | * Concurrent access to it is serialized via the | ||
467 | * log_buffer_lock. | ||
468 | * | ||
469 | * The maximum length of a formatted message is 255. | ||
470 | */ | ||
471 | void sched_trace_log_message(const char* fmt, ...) | ||
472 | { | ||
473 | unsigned long flags; | ||
474 | va_list args; | ||
475 | size_t len; | ||
476 | char* buf; | ||
477 | |||
478 | va_start(args, fmt); | ||
479 | local_irq_save(flags); | ||
480 | |||
481 | /* format message */ | ||
482 | buf = __get_cpu_var(fmt_buffer); | ||
483 | len = vscnprintf(buf, MSG_SIZE, fmt, args); | ||
484 | |||
485 | spin_lock(&log_buffer_lock); | ||
486 | /* Don't copy the trailing null byte, we don't want null bytes | ||
487 | * in a text file. | ||
488 | */ | ||
489 | rb_put(&log_buffer.buf, buf, len); | ||
490 | spin_unlock(&log_buffer_lock); | ||
491 | |||
492 | local_irq_restore(flags); | ||
493 | va_end(args); | ||
494 | } | ||
495 | |||
496 | void dump_trace_buffer(int max) | ||
497 | { | ||
498 | char line[80]; | ||
499 | int len; | ||
500 | int count = 0; | ||
501 | |||
502 | /* potentially, but very unlikely race... */ | ||
503 | trace_recurse = 1; | ||
504 | while ((max == 0 || count++ < max) && | ||
505 | (len = rb_get(&log_buffer.buf, line, sizeof(line) - 1)) > 0) { | ||
506 | line[len] = '\0'; | ||
507 | printk("%s", line); | ||
508 | } | ||
509 | trace_recurse = 0; | ||
510 | } | ||