aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--litmus/sched_trace.c443
1 files changed, 171 insertions, 272 deletions
diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c
index 5befc1e21b1f..4c1ea7eab635 100644
--- a/litmus/sched_trace.c
+++ b/litmus/sched_trace.c
@@ -1,147 +1,117 @@
1/* 1/*
2 * sched_trace.c -- record scheduling events to a byte stream. 2 * sched_trace.c -- record scheduling events to a byte stream.
3 *
4 * TODO: Move ring buffer to a lockfree implementation.
5 */ 3 */
6
7#include <linux/spinlock.h> 4#include <linux/spinlock.h>
5#include <linux/semaphore.h>
6
8#include <linux/fs.h> 7#include <linux/fs.h>
9#include <linux/cdev.h> 8#include <linux/cdev.h>
10#include <linux/semaphore.h>
11#include <asm/uaccess.h> 9#include <asm/uaccess.h>
12#include <linux/module.h> 10#include <linux/module.h>
13#include <linux/sysrq.h> 11#include <linux/sysrq.h>
14 12
13#include <linux/kfifo.h>
14
15#include <litmus/sched_trace.h> 15#include <litmus/sched_trace.h>
16#include <litmus/litmus.h> 16#include <litmus/litmus.h>
17 17
18typedef struct { 18/* Allocate a buffer of about 32k per CPU */
19 /* guard read and write pointers */ 19#define LITMUS_TRACE_BUF_PAGES 8
20 spinlock_t lock; 20#define LITMUS_TRACE_BUF_SIZE (PAGE_SIZE * LITMUS_TRACE_BUF_PAGES * NR_CPUS)
21 /* guard against concurrent freeing of buffer */
22 rwlock_t del_lock;
23 21
24 /* memory allocated for ring buffer */ 22/* Max length of one read from the buffer */
25 unsigned long order; 23#define MAX_READ_LEN (64 * 1024)
26 char* buf;
27 char* end;
28 24
29 /* Read/write pointer. May not cross. 25/* Max length for one write --- from kernel --- to the buffer */
30 * They point to the position of next write and 26#define MSG_SIZE 255
31 * last read. 27
32 */ 28/*
33 char* writep; 29 * Major number for the tracing char device.
34 char* readp; 30 * the major numbes are from the unassigned/local use block
31 */
32#define LOG_MAJOR 251
33
34/* Inner ring buffer structure */
35typedef struct {
36 rwlock_t del_lock;
35 37
38 /* the buffer */
39 struct kfifo *kfifo;
36} ring_buffer_t; 40} ring_buffer_t;
37 41
38#define EMPTY_RING_BUFFER { \ 42/* Main buffer structure */
39 .lock = SPIN_LOCK_UNLOCKED, \ 43typedef struct {
40 .del_lock = RW_LOCK_UNLOCKED, \ 44 ring_buffer_t buf;
41 .buf = NULL, \ 45 atomic_t reader_cnt;
42 .end = NULL, \ 46 struct semaphore reader_mutex;
43 .writep = NULL, \ 47} trace_buffer_t;
44 .readp = NULL \ 48
45}
46 49
50/*
51 * Inner buffer management functions
52 */
47void rb_init(ring_buffer_t* buf) 53void rb_init(ring_buffer_t* buf)
48{ 54{
49 *buf = (ring_buffer_t) EMPTY_RING_BUFFER; 55 rwlock_init(&buf->del_lock);
56 buf->kfifo = NULL;
50} 57}
51 58
52int rb_alloc_buf(ring_buffer_t* buf, unsigned long order) 59int rb_alloc_buf(ring_buffer_t* buf, unsigned int size)
53{ 60{
54 unsigned long flags; 61 unsigned long flags;
55 int error = 0;
56 char *mem;
57 62
58 /* do memory allocation while not atomic */
59 mem = (char *) __get_free_pages(GFP_KERNEL, order);
60 if (!mem)
61 return -ENOMEM;
62 write_lock_irqsave(&buf->del_lock, flags); 63 write_lock_irqsave(&buf->del_lock, flags);
63 BUG_ON(buf->buf); 64
64 buf->buf = mem; 65 buf->kfifo = kfifo_alloc(size, GFP_ATOMIC, NULL);
65 buf->end = buf->buf + PAGE_SIZE * (1 << order) - 1; 66
66 memset(buf->buf, 0xff, buf->end - buf->buf);
67 buf->order = order;
68 buf->writep = buf->buf + 1;
69 buf->readp = buf->buf;
70 write_unlock_irqrestore(&buf->del_lock, flags); 67 write_unlock_irqrestore(&buf->del_lock, flags);
71 return error; 68
69 if(IS_ERR(buf->kfifo)) {
70 printk(KERN_ERR "kfifo_alloc failed\n");
71 return PTR_ERR(buf->kfifo);
72 }
73
74 return 0;
72} 75}
73 76
74int rb_free_buf(ring_buffer_t* buf) 77int rb_free_buf(ring_buffer_t* buf)
75{ 78{
76 unsigned long flags; 79 unsigned long flags;
77 int error = 0; 80
78 write_lock_irqsave(&buf->del_lock, flags); 81 write_lock_irqsave(&buf->del_lock, flags);
79 BUG_ON(!buf->buf); 82
80 free_pages((unsigned long) buf->buf, buf->order); 83 BUG_ON(!buf->kfifo);
81 buf->buf = NULL; 84 kfifo_free(buf->kfifo);
82 buf->end = NULL; 85
83 buf->writep = NULL; 86 buf->kfifo = NULL;
84 buf->readp = NULL; 87
85 write_unlock_irqrestore(&buf->del_lock, flags); 88 write_unlock_irqrestore(&buf->del_lock, flags);
86 return error; 89
90 return 0;
87} 91}
88 92
89/* Assumption: concurrent writes are serialized externally 93/*
94 * Assumption: concurrent writes are serialized externally
90 * 95 *
91 * Will only succeed if there is enough space for all len bytes. 96 * Will only succeed if there is enough space for all len bytes.
92 */ 97 */
93int rb_put(ring_buffer_t* buf, char* mem, size_t len) 98int rb_put(ring_buffer_t* buf, char* mem, size_t len)
94{ 99{
95 unsigned long flags; 100 unsigned long flags;
96 char* r , *w;
97 int error = 0; 101 int error = 0;
102
98 read_lock_irqsave(&buf->del_lock, flags); 103 read_lock_irqsave(&buf->del_lock, flags);
99 if (!buf->buf) { 104
105 if (!buf->kfifo) {
100 error = -ENODEV; 106 error = -ENODEV;
101 goto out; 107 goto out;
102 } 108 }
103 spin_lock(&buf->lock); 109
104 r = buf->readp; 110 if((__kfifo_put(buf->kfifo, mem, len)) < len) {
105 w = buf->writep;
106 spin_unlock(&buf->lock);
107 if (r < w && buf->end - w >= len - 1) {
108 /* easy case: there is enough space in the buffer
109 * to write it in one continous chunk*/
110 memcpy(w, mem, len);
111 w += len;
112 if (w > buf->end)
113 /* special case: fit exactly into buffer
114 * w is now buf->end + 1
115 */
116 w = buf->buf;
117 } else if (w < r && r - w >= len) { /* >= len because may not cross */
118 /* we are constrained by the read pointer but we there
119 * is enough space
120 */
121 memcpy(w, mem, len);
122 w += len;
123 } else if (r <= w && buf->end - w < len - 1) {
124 /* the wrap around case: there may or may not be space */
125 if ((buf->end - w) + (r - buf->buf) >= len - 1) {
126 /* copy chunk that fits at the end */
127 memcpy(w, mem, buf->end - w + 1);
128 mem += buf->end - w + 1;
129 len -= (buf->end - w + 1);
130 w = buf->buf;
131 /* copy the rest */
132 memcpy(w, mem, len);
133 w += len;
134 }
135 else
136 error = -ENOMEM;
137 } else {
138 error = -ENOMEM; 111 error = -ENOMEM;
112 goto out;
139 } 113 }
140 if (!error) { 114
141 spin_lock(&buf->lock);
142 buf->writep = w;
143 spin_unlock(&buf->lock);
144 }
145 out: 115 out:
146 read_unlock_irqrestore(&buf->del_lock, flags); 116 read_unlock_irqrestore(&buf->del_lock, flags);
147 return error; 117 return error;
@@ -151,143 +121,107 @@ int rb_put(ring_buffer_t* buf, char* mem, size_t len)
151int rb_get(ring_buffer_t* buf, char* mem, size_t len) 121int rb_get(ring_buffer_t* buf, char* mem, size_t len)
152{ 122{
153 unsigned long flags; 123 unsigned long flags;
154 char* r , *w;
155 int error = 0; 124 int error = 0;
125
156 read_lock_irqsave(&buf->del_lock, flags); 126 read_lock_irqsave(&buf->del_lock, flags);
157 if (!buf->buf) { 127 if (!buf->kfifo) {
158 error = -ENODEV; 128 error = -ENODEV;
159 goto out; 129 goto out;
160 } 130 }
161 spin_lock(&buf->lock);
162 r = buf->readp;
163 w = buf->writep;
164 spin_unlock(&buf->lock);
165
166 if (w <= r && buf->end - r >= len) {
167 /* easy case: there is enough data in the buffer
168 * to get it in one chunk*/
169 memcpy(mem, r + 1, len);
170 r += len;
171 error = len;
172
173 } else if (r + 1 < w && w - r - 1 >= len) {
174 /* we are constrained by the write pointer but
175 * there is enough data
176 */
177 memcpy(mem, r + 1, len);
178 r += len;
179 error = len;
180
181 } else if (r + 1 < w && w - r - 1 < len) {
182 /* we are constrained by the write pointer and there
183 * there is not enough data
184 */
185 memcpy(mem, r + 1, w - r - 1);
186 error = w - r - 1;
187 r += w - r - 1;
188
189 } else if (w <= r && buf->end - r < len) {
190 /* the wrap around case: there may or may not be enough data
191 * first let's get what is available
192 */
193 memcpy(mem, r + 1, buf->end - r);
194 error += (buf->end - r);
195 mem += (buf->end - r);
196 len -= (buf->end - r);
197 r += (buf->end - r);
198
199 if (w > buf->buf) {
200 /* there is more to get */
201 r = buf->buf - 1;
202 if (w - r >= len) {
203 /* plenty */
204 memcpy(mem, r + 1, len);
205 error += len;
206 r += len;
207 } else {
208 memcpy(mem, r + 1, w - r - 1);
209 error += w - r - 1;
210 r += w - r - 1;
211 }
212 }
213 } /* nothing available */
214 131
215 if (error > 0) { 132 error = __kfifo_get(buf->kfifo, (unsigned char*)mem, len);
216 spin_lock(&buf->lock); 133
217 buf->readp = r;
218 spin_unlock(&buf->lock);
219 }
220 out: 134 out:
221 read_unlock_irqrestore(&buf->del_lock, flags); 135 read_unlock_irqrestore(&buf->del_lock, flags);
222 return error; 136 return error;
223} 137}
224 138
139/*
140 * Device Driver management
141 */
142static spinlock_t log_buffer_lock = SPIN_LOCK_UNLOCKED;
143static trace_buffer_t log_buffer;
225 144
145static void init_log_buffer(void)
146{
147 rb_init(&log_buffer.buf);
148 atomic_set(&log_buffer.reader_cnt,0);
149 init_MUTEX(&log_buffer.reader_mutex);
150}
226 151
227/******************************************************************************/ 152static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer);
228/* DEVICE FILE DRIVER */
229/******************************************************************************/
230
231
232 153
233/* Allocate a buffer of about 1 MB per CPU. 154/*
155 * sched_trace_log_message - Write to the trace buffer (log_buffer)
234 * 156 *
157 * This is the only function accessing the log_buffer from inside the
158 * kernel for writing.
159 * Concurrent access to sched_trace_log_message must be serialized using
160 * log_buffer_lock
161 * The maximum length of a formatted message is 255
235 */ 162 */
236#define BUFFER_ORDER 8 163void sched_trace_log_message(const char* fmt, ...)
237 164{
238typedef struct { 165 unsigned long flags;
239 ring_buffer_t buf; 166 va_list args;
240 atomic_t reader_cnt; 167 size_t len;
241 struct semaphore reader_mutex; 168 char* buf;
242} trace_buffer_t;
243
244
245/* This does not initialize the semaphore!! */
246 169
247#define EMPTY_TRACE_BUFFER \ 170 va_start(args, fmt);
248 { .buf = EMPTY_RING_BUFFER, .reader_cnt = ATOMIC_INIT(0)} 171 local_irq_save(flags);
249 172
250static spinlock_t log_buffer_lock = SPIN_LOCK_UNLOCKED; 173 /* format message */
251static trace_buffer_t log_buffer = EMPTY_TRACE_BUFFER; 174 buf = __get_cpu_var(fmt_buffer);
175 len = vscnprintf(buf, MSG_SIZE, fmt, args);
252 176
253static void init_log_buffer(void) 177 spin_lock(&log_buffer_lock);
254{ 178 /* Don't copy the trailing null byte, we don't want null bytes
255 /* only initialize the mutex, the rest was initialized as part 179 * in a text file.
256 * of the static initialization macro
257 */ 180 */
258 init_MUTEX(&log_buffer.reader_mutex); 181 rb_put(&log_buffer.buf, buf, len);
182 spin_unlock(&log_buffer_lock);
183
184 local_irq_restore(flags);
185 va_end(args);
259} 186}
260 187
188/*
189 * log_read - Read the trace buffer
190 *
191 * This function is called as a file operation from userspace.
192 * Readers can sleep. Access is serialized through reader_mutex
193 */
261static ssize_t log_read(struct file *filp, char __user *to, size_t len, 194static ssize_t log_read(struct file *filp, char __user *to, size_t len,
262 loff_t *f_pos) 195 loff_t *f_pos)
263{ 196{
264 /* we ignore f_pos, this is strictly sequential */ 197 /* we ignore f_pos, this is strictly sequential */
265 198
266 ssize_t error = -EINVAL; 199 ssize_t error = -EINVAL;
267 char* mem; 200 char* mem;
268 trace_buffer_t *buf = filp->private_data; 201 trace_buffer_t *tbuf = filp->private_data;
269 202
270 if (down_interruptible(&buf->reader_mutex)) { 203 if (down_interruptible(&tbuf->reader_mutex)) {
271 error = -ERESTARTSYS; 204 error = -ERESTARTSYS;
272 goto out; 205 goto out;
273 } 206 }
274 207
275 if (len > 64 * 1024) 208 if (len > MAX_READ_LEN)
276 len = 64 * 1024; 209 len = MAX_READ_LEN;
210
277 mem = kmalloc(len, GFP_KERNEL); 211 mem = kmalloc(len, GFP_KERNEL);
278 if (!mem) { 212 if (!mem) {
279 error = -ENOMEM; 213 error = -ENOMEM;
280 goto out_unlock; 214 goto out_unlock;
281 } 215 }
282 216
283 error = rb_get(&buf->buf, mem, len); 217 error = rb_get(&tbuf->buf, mem, len);
284 while (!error) { 218 while (!error) {
285 set_current_state(TASK_INTERRUPTIBLE); 219 set_current_state(TASK_INTERRUPTIBLE);
286 schedule_timeout(110); 220 schedule_timeout(110);
287 if (signal_pending(current)) 221 if (signal_pending(current))
288 error = -ERESTARTSYS; 222 error = -ERESTARTSYS;
289 else 223 else
290 error = rb_get(&buf->buf, mem, len); 224 error = rb_get(&tbuf->buf, mem, len);
291 } 225 }
292 226
293 if (error > 0 && copy_to_user(to, mem, error)) 227 if (error > 0 && copy_to_user(to, mem, error))
@@ -295,89 +229,87 @@ static ssize_t log_read(struct file *filp, char __user *to, size_t len,
295 229
296 kfree(mem); 230 kfree(mem);
297 out_unlock: 231 out_unlock:
298 up(&buf->reader_mutex); 232 up(&tbuf->reader_mutex);
299 out: 233 out:
300 return error; 234 return error;
301} 235}
302 236
303/* defined in kernel/printk.c */ 237/*
238 * Enable redirection of printk() messages to the trace buffer.
239 * Defined in kernel/printk.c
240 */
304extern int trace_override; 241extern int trace_override;
305extern int trace_recurse; 242extern int trace_recurse;
306 243
307/* log_open - open the global log message ring buffer. 244/*
245 * log_open - open the global log message ring buffer.
308 */ 246 */
309static int log_open(struct inode *in, struct file *filp) 247static int log_open(struct inode *in, struct file *filp)
310{ 248{
311 int error = -EINVAL; 249 int error = -EINVAL;
312 trace_buffer_t* buf; 250 trace_buffer_t* tbuf;
313 251
314 buf = &log_buffer; 252 tbuf = &log_buffer;
315 253
316 if (down_interruptible(&buf->reader_mutex)) { 254 if (down_interruptible(&tbuf->reader_mutex)) {
317 error = -ERESTARTSYS; 255 error = -ERESTARTSYS;
318 goto out; 256 goto out;
319 } 257 }
320 258
321 /* first open must allocate buffers */ 259 /* first open must allocate buffers */
322 if (atomic_inc_return(&buf->reader_cnt) == 1) { 260 if (atomic_inc_return(&tbuf->reader_cnt) == 1) {
323 if ((error = rb_alloc_buf(&buf->buf, BUFFER_ORDER))) 261 if ((error = rb_alloc_buf(&tbuf->buf, LITMUS_TRACE_BUF_SIZE)))
324 { 262 {
325 atomic_dec(&buf->reader_cnt); 263 atomic_dec(&tbuf->reader_cnt);
326 goto out_unlock; 264 goto out_unlock;
327 } 265 }
328 } 266 }
329 267
330 error = 0; 268 error = 0;
331 filp->private_data = buf; 269 filp->private_data = tbuf;
332 printk(KERN_DEBUG "sched_trace buf: from 0x%p to 0x%p length: %x\n", 270
333 buf->buf.buf, buf->buf.end, 271 printk(KERN_DEBUG
334 (unsigned int) (buf->buf.end - buf->buf.buf)); 272 "sched_trace kfifo at 0x%p with buffer starting at: 0x%p\n",
273 tbuf->buf.kfifo, &((tbuf->buf.kfifo)->buffer));
335 274
336 /* override printk() */ 275 /* override printk() */
337 trace_override++; 276 trace_override++;
338 277
339 out_unlock: 278 out_unlock:
340 up(&buf->reader_mutex); 279 up(&tbuf->reader_mutex);
341 out: 280 out:
342 return error; 281 return error;
343} 282}
344 283
345static int log_release(struct inode *in, struct file *filp) 284static int log_release(struct inode *in, struct file *filp)
346{ 285{
347 int error = -EINVAL; 286 int error = -EINVAL;
348 trace_buffer_t* buf = filp->private_data; 287 trace_buffer_t* tbuf = filp->private_data;
349 288
350 BUG_ON(!filp->private_data); 289 BUG_ON(!filp->private_data);
351 290
352 if (down_interruptible(&buf->reader_mutex)) { 291 if (down_interruptible(&tbuf->reader_mutex)) {
353 error = -ERESTARTSYS; 292 error = -ERESTARTSYS;
354 goto out; 293 goto out;
355 } 294 }
356 295
357 /* last release must deallocate buffers */ 296 /* last release must deallocate buffers */
358 if (atomic_dec_return(&buf->reader_cnt) == 0) { 297 if (atomic_dec_return(&tbuf->reader_cnt) == 0) {
359 error = rb_free_buf(&buf->buf); 298 error = rb_free_buf(&tbuf->buf);
360 } 299 }
361 300
362 /* release printk() overriding */ 301 /* release printk() overriding */
363 trace_override--; 302 trace_override--;
364 303
365 up(&buf->reader_mutex); 304 printk(KERN_DEBUG "sched_trace kfifo released\n");
305
306 up(&tbuf->reader_mutex);
366 out: 307 out:
367 return error; 308 return error;
368} 309}
369 310
370/******************************************************************************/ 311/*
371/* Device Registration */ 312 * log_fops - The file operations for accessing the global LITMUS log message
372/******************************************************************************/
373
374/* the major numbes are from the unassigned/local use block
375 *
376 * This should be converted to dynamic allocation at some point...
377 */
378#define LOG_MAJOR 251
379
380/* log_fops - The file operations for accessing the global LITMUS log message
381 * buffer. 313 * buffer.
382 * 314 *
383 * Except for opening the device file it uses the same operations as trace_fops. 315 * Except for opening the device file it uses the same operations as trace_fops.
@@ -389,6 +321,9 @@ struct file_operations log_fops = {
389 .read = log_read, 321 .read = log_read,
390}; 322};
391 323
324/*
325 * Device registration
326 */
392static int __init register_buffer_dev(const char* name, 327static int __init register_buffer_dev(const char* name,
393 struct file_operations* fops, 328 struct file_operations* fops,
394 int major, int count) 329 int major, int count)
@@ -398,7 +333,7 @@ static int __init register_buffer_dev(const char* name,
398 int error = 0; 333 int error = 0;
399 334
400 trace_dev = MKDEV(major, 0); 335 trace_dev = MKDEV(major, 0);
401 error = register_chrdev_region(trace_dev, count, name); 336 error = register_chrdev_region(trace_dev, count, name);
402 if (error) 337 if (error)
403 { 338 {
404 printk(KERN_WARNING "sched trace: " 339 printk(KERN_WARNING "sched trace: "
@@ -424,6 +359,21 @@ static int __init register_buffer_dev(const char* name,
424} 359}
425 360
426#ifdef CONFIG_MAGIC_SYSRQ 361#ifdef CONFIG_MAGIC_SYSRQ
362void dump_trace_buffer(int max)
363{
364 char line[80];
365 int len;
366 int count = 0;
367
368 /* potentially, but very unlikely race... */
369 trace_recurse = 1;
370 while ((max == 0 || count++ < max) &&
371 (len = rb_get(&log_buffer.buf, line, sizeof(line) - 1)) > 0) {
372 line[len] = '\0';
373 printk("%s", line);
374 }
375 trace_recurse = 0;
376}
427 377
428static void sysrq_dump_trace_buffer(int key, struct tty_struct *tty) 378static void sysrq_dump_trace_buffer(int key, struct tty_struct *tty)
429{ 379{
@@ -435,7 +385,6 @@ static struct sysrq_key_op sysrq_dump_trace_buffer_op = {
435 .help_msg = "dump-trace-buffer(Y)", 385 .help_msg = "dump-trace-buffer(Y)",
436 .action_msg = "writing content of TRACE() buffer", 386 .action_msg = "writing content of TRACE() buffer",
437}; 387};
438
439#endif 388#endif
440 389
441static int __init init_sched_trace(void) 390static int __init init_sched_trace(void)
@@ -458,53 +407,3 @@ static int __init init_sched_trace(void)
458 407
459module_init(init_sched_trace); 408module_init(init_sched_trace);
460 409
461#define MSG_SIZE 255
462static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer);
463
464/* sched_trace_log_message - This is the only function that accesses the the
465 * log buffer inside the kernel for writing.
466 * Concurrent access to it is serialized via the
467 * log_buffer_lock.
468 *
469 * The maximum length of a formatted message is 255.
470 */
471void sched_trace_log_message(const char* fmt, ...)
472{
473 unsigned long flags;
474 va_list args;
475 size_t len;
476 char* buf;
477
478 va_start(args, fmt);
479 local_irq_save(flags);
480
481 /* format message */
482 buf = __get_cpu_var(fmt_buffer);
483 len = vscnprintf(buf, MSG_SIZE, fmt, args);
484
485 spin_lock(&log_buffer_lock);
486 /* Don't copy the trailing null byte, we don't want null bytes
487 * in a text file.
488 */
489 rb_put(&log_buffer.buf, buf, len);
490 spin_unlock(&log_buffer_lock);
491
492 local_irq_restore(flags);
493 va_end(args);
494}
495
496void dump_trace_buffer(int max)
497{
498 char line[80];
499 int len;
500 int count = 0;
501
502 /* potentially, but very unlikely race... */
503 trace_recurse = 1;
504 while ((max == 0 || count++ < max) &&
505 (len = rb_get(&log_buffer.buf, line, sizeof(line) - 1)) > 0) {
506 line[len] = '\0';
507 printk("%s", line);
508 }
509 trace_recurse = 0;
510}