aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc/hwlat_detector.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/hwlat_detector.c')
-rw-r--r--drivers/misc/hwlat_detector.c1210
1 files changed, 1210 insertions, 0 deletions
diff --git a/drivers/misc/hwlat_detector.c b/drivers/misc/hwlat_detector.c
new file mode 100644
index 000000000000..953783c983dd
--- /dev/null
+++ b/drivers/misc/hwlat_detector.c
@@ -0,0 +1,1210 @@
1/*
2 * hwlat_detector.c - A simple Hardware Latency detector.
3 *
4 * Use this module to detect large system latencies induced by the behavior of
5 * certain underlying system hardware or firmware, independent of Linux itself.
6 * The code was developed originally to detect the presence of SMIs on Intel
7 * and AMD systems, although there is no dependency upon x86 herein.
8 *
9 * The classical example usage of this module is in detecting the presence of
10 * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
11 * somewhat special form of hardware interrupt spawned from earlier CPU debug
12 * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
13 * LPC (or other device) to generate a special interrupt under certain
14 * circumstances, for example, upon expiration of a special SMI timer device,
15 * due to certain external thermal readings, on certain I/O address accesses,
16 * and other situations. An SMI hits a special CPU pin, triggers a special
17 * SMI mode (complete with special memory map), and the OS is unaware.
18 *
19 * Although certain hardware-inducing latencies are necessary (for example,
20 * a modern system often requires an SMI handler for correct thermal control
21 * and remote management) they can wreak havoc upon any OS-level performance
22 * guarantees toward low-latency, especially when the OS is not even made
23 * aware of the presence of these interrupts. For this reason, we need a
24 * somewhat brute force mechanism to detect these interrupts. In this case,
25 * we do it by hogging all of the CPU(s) for configurable timer intervals,
26 * sampling the built-in CPU timer, looking for discontiguous readings.
27 *
28 * WARNING: This implementation necessarily introduces latencies. Therefore,
29 * you should NEVER use this module in a production environment
30 * requiring any kind of low-latency performance guarantee(s).
31 *
32 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
33 *
34 * Includes useful feedback from Clark Williams <clark@redhat.com>
35 *
36 * This file is licensed under the terms of the GNU General Public
37 * License version 2. This program is licensed "as is" without any
38 * warranty of any kind, whether express or implied.
39 */
40
41#include <linux/module.h>
42#include <linux/init.h>
43#include <linux/ring_buffer.h>
44#include <linux/stop_machine.h>
45#include <linux/time.h>
46#include <linux/hrtimer.h>
47#include <linux/kthread.h>
48#include <linux/debugfs.h>
49#include <linux/seq_file.h>
50#include <linux/uaccess.h>
51#include <linux/version.h>
52#include <linux/delay.h>
53
54#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */
55#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */
56#define U64STR_SIZE 22 /* 20 digits max */
57
58#define VERSION "1.0.0"
59#define BANNER "hwlat_detector: "
60#define DRVNAME "hwlat_detector"
61#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */
62#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */
63#define DEFAULT_LAT_THRESHOLD 10 /* 10us */
64
65/* Module metadata */
66
67MODULE_LICENSE("GPL");
68MODULE_AUTHOR("Jon Masters <jcm@redhat.com>");
69MODULE_DESCRIPTION("A simple hardware latency detector");
70MODULE_VERSION(VERSION);
71
72/* Module parameters */
73
74static int debug;
75static int enabled;
76static int threshold;
77
78module_param(debug, int, 0); /* enable debug */
79module_param(enabled, int, 0); /* enable detector */
80module_param(threshold, int, 0); /* latency threshold */
81
82/* Buffering and sampling */
83
84static struct ring_buffer *ring_buffer; /* sample buffer */
85static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */
86static unsigned long buf_size = BUF_SIZE_DEFAULT;
87static struct task_struct *kthread; /* sampling thread */
88
89/* DebugFS filesystem entries */
90
91static struct dentry *debug_dir; /* debugfs directory */
92static struct dentry *debug_max; /* maximum TSC delta */
93static struct dentry *debug_count; /* total detect count */
94static struct dentry *debug_sample_width; /* sample width us */
95static struct dentry *debug_sample_window; /* sample window us */
96static struct dentry *debug_sample; /* raw samples us */
97static struct dentry *debug_threshold; /* threshold us */
98static struct dentry *debug_enable; /* enable/disable */
99
100/* Individual samples and global state */
101
102struct sample; /* latency sample */
103struct data; /* Global state */
104
105/* Sampling functions */
106static int __buffer_add_sample(struct sample *sample);
107static struct sample *buffer_get_sample(struct sample *sample);
108static int get_sample(void *unused);
109
110/* Threading and state */
111static int kthread_fn(void *unused);
112static int start_kthread(void);
113static int stop_kthread(void);
114static void __reset_stats(void);
115static int init_stats(void);
116
117/* Debugfs interface */
118static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
119 size_t cnt, loff_t *ppos, const u64 *entry);
120static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
121 size_t cnt, loff_t *ppos, u64 *entry);
122static int debug_sample_fopen(struct inode *inode, struct file *filp);
123static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
124 size_t cnt, loff_t *ppos);
125static int debug_sample_release(struct inode *inode, struct file *filp);
126static int debug_enable_fopen(struct inode *inode, struct file *filp);
127static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
128 size_t cnt, loff_t *ppos);
129static ssize_t debug_enable_fwrite(struct file *file,
130 const char __user *user_buffer,
131 size_t user_size, loff_t *offset);
132
133/* Initialization functions */
134static int init_debugfs(void);
135static void free_debugfs(void);
136static int detector_init(void);
137static void detector_exit(void);
138
139/* Individual latency samples are stored here when detected and packed into
140 * the ring_buffer circular buffer, where they are overwritten when
141 * more than buf_size/sizeof(sample) samples are received. */
142struct sample {
143 u64 seqnum; /* unique sequence */
144 u64 duration; /* ktime delta */
145 struct timespec timestamp; /* wall time */
146};
147
148/* keep the global state somewhere. Mostly used under stop_machine. */
149static struct data {
150
151 struct mutex lock; /* protect changes */
152
153 u64 count; /* total since reset */
154 u64 max_sample; /* max hardware latency */
155 u64 threshold; /* sample threshold level */
156
157 u64 sample_window; /* total sampling window (on+off) */
158 u64 sample_width; /* active sampling portion of window */
159
160 atomic_t sample_open; /* whether the sample file is open */
161
162 wait_queue_head_t wq; /* waitqeue for new sample values */
163
164} data;
165
166/**
167 * __buffer_add_sample - add a new latency sample recording to the ring buffer
168 * @sample: The new latency sample value
169 *
170 * This receives a new latency sample and records it in a global ring buffer.
171 * No additional locking is used in this case - suited for stop_machine use.
172 */
173static int __buffer_add_sample(struct sample *sample)
174{
175 return ring_buffer_write(ring_buffer,
176 sizeof(struct sample), sample);
177}
178
179/**
180 * buffer_get_sample - remove a hardware latency sample from the ring buffer
181 * @sample: Pre-allocated storage for the sample
182 *
183 * This retrieves a hardware latency sample from the global circular buffer
184 */
185static struct sample *buffer_get_sample(struct sample *sample)
186{
187 struct ring_buffer_event *e = NULL;
188 struct sample *s = NULL;
189 unsigned int cpu = 0;
190
191 if (!sample)
192 return NULL;
193
194 mutex_lock(&ring_buffer_mutex);
195 for_each_online_cpu(cpu) {
196 e = ring_buffer_consume(ring_buffer, cpu, NULL);
197 if (e)
198 break;
199 }
200
201 if (e) {
202 s = ring_buffer_event_data(e);
203 memcpy(sample, s, sizeof(struct sample));
204 } else
205 sample = NULL;
206 mutex_unlock(&ring_buffer_mutex);
207
208 return sample;
209}
210
211/**
212 * get_sample - sample the CPU TSC and look for likely hardware latencies
213 * @unused: This is not used but is a part of the stop_machine API
214 *
215 * Used to repeatedly capture the CPU TSC (or similar), looking for potential
216 * hardware-induced latency. Called under stop_machine, with data.lock held.
217 */
218static int get_sample(void *unused)
219{
220 ktime_t start, t1, t2;
221 s64 diff, total = 0;
222 u64 sample = 0;
223 int ret = 1;
224
225 start = ktime_get(); /* start timestamp */
226
227 do {
228
229 t1 = ktime_get(); /* we'll look for a discontinuity */
230 t2 = ktime_get();
231
232 total = ktime_to_us(ktime_sub(t2, start)); /* sample width */
233 diff = ktime_to_us(ktime_sub(t2, t1)); /* current diff */
234
235 /* This shouldn't happen */
236 if (diff < 0) {
237 printk(KERN_ERR BANNER "time running backwards\n");
238 goto out;
239 }
240
241 if (diff > sample)
242 sample = diff; /* only want highest value */
243
244 } while (total <= data.sample_width);
245
246 /* If we exceed the threshold value, we have found a hardware latency */
247 if (sample > data.threshold) {
248 struct sample s;
249
250 data.count++;
251 s.seqnum = data.count;
252 s.duration = sample;
253 s.timestamp = CURRENT_TIME;
254 __buffer_add_sample(&s);
255
256 /* Keep a running maximum ever recorded hardware latency */
257 if (sample > data.max_sample)
258 data.max_sample = sample;
259 }
260
261 ret = 0;
262out:
263 return ret;
264}
265
266/*
267 * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
268 * @unused: A required part of the kthread API.
269 *
270 * Used to periodically sample the CPU TSC via a call to get_sample. We
271 * use stop_machine, whith does (intentionally) introduce latency since we
272 * need to ensure nothing else might be running (and thus pre-empting).
273 * Obviously this should never be used in production environments.
274 *
275 * stop_machine will schedule us typically only on CPU0 which is fine for
276 * almost every real-world hardware latency situation - but we might later
277 * generalize this if we find there are any actualy systems with alternate
278 * SMI delivery or other non CPU0 hardware latencies.
279 */
280static int kthread_fn(void *unused)
281{
282 int err = 0;
283 u64 interval = 0;
284
285 while (!kthread_should_stop()) {
286
287 mutex_lock(&data.lock);
288
289 err = stop_machine(get_sample, unused, 0);
290 if (err) {
291 /* Houston, we have a problem */
292 mutex_unlock(&data.lock);
293 goto err_out;
294 }
295
296 wake_up(&data.wq); /* wake up reader(s) */
297
298 interval = data.sample_window - data.sample_width;
299 do_div(interval, USEC_PER_MSEC); /* modifies interval value */
300
301 mutex_unlock(&data.lock);
302
303 if (msleep_interruptible(interval))
304 goto out;
305 }
306 goto out;
307err_out:
308 printk(KERN_ERR BANNER "could not call stop_machine, disabling\n");
309 enabled = 0;
310out:
311 return err;
312
313}
314
315/**
316 * start_kthread - Kick off the hardware latency sampling/detector kthread
317 *
318 * This starts a kernel thread that will sit and sample the CPU timestamp
319 * counter (TSC or similar) and look for potential hardware latencies.
320 */
321static int start_kthread(void)
322{
323 kthread = kthread_run(kthread_fn, NULL,
324 DRVNAME);
325 if (IS_ERR(kthread)) {
326 printk(KERN_ERR BANNER "could not start sampling thread\n");
327 enabled = 0;
328 return -ENOMEM;
329 }
330
331 return 0;
332}
333
334/**
335 * stop_kthread - Inform the hardware latency samping/detector kthread to stop
336 *
337 * This kicks the running hardware latency sampling/detector kernel thread and
338 * tells it to stop sampling now. Use this on unload and at system shutdown.
339 */
340static int stop_kthread(void)
341{
342 int ret;
343
344 ret = kthread_stop(kthread);
345
346 return ret;
347}
348
349/**
350 * __reset_stats - Reset statistics for the hardware latency detector
351 *
352 * We use data to store various statistics and global state. We call this
353 * function in order to reset those when "enable" is toggled on or off, and
354 * also at initialization. Should be called with data.lock held.
355 */
356static void __reset_stats(void)
357{
358 data.count = 0;
359 data.max_sample = 0;
360 ring_buffer_reset(ring_buffer); /* flush out old sample entries */
361}
362
363/**
364 * init_stats - Setup global state statistics for the hardware latency detector
365 *
366 * We use data to store various statistics and global state. We also use
367 * a global ring buffer (ring_buffer) to keep raw samples of detected hardware
368 * induced system latencies. This function initializes these structures and
369 * allocates the global ring buffer also.
370 */
371static int init_stats(void)
372{
373 int ret = -ENOMEM;
374
375 mutex_init(&data.lock);
376 init_waitqueue_head(&data.wq);
377 atomic_set(&data.sample_open, 0);
378
379 ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS);
380
381 if (WARN(!ring_buffer, KERN_ERR BANNER
382 "failed to allocate ring buffer!\n"))
383 goto out;
384
385 __reset_stats();
386 data.threshold = DEFAULT_LAT_THRESHOLD; /* threshold us */
387 data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
388 data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */
389
390 ret = 0;
391
392out:
393 return ret;
394
395}
396
397/*
398 * simple_data_read - Wrapper read function for global state debugfs entries
399 * @filp: The active open file structure for the debugfs "file"
400 * @ubuf: The userspace provided buffer to read value into
401 * @cnt: The maximum number of bytes to read
402 * @ppos: The current "file" position
403 * @entry: The entry to read from
404 *
405 * This function provides a generic read implementation for the global state
406 * "data" structure debugfs filesystem entries. It would be nice to use
407 * simple_attr_read directly, but we need to make sure that the data.lock
408 * spinlock is held during the actual read (even though we likely won't ever
409 * actually race here as the updater runs under a stop_machine context).
410 */
411static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
412 size_t cnt, loff_t *ppos, const u64 *entry)
413{
414 char buf[U64STR_SIZE];
415 u64 val = 0;
416 int len = 0;
417
418 memset(buf, 0, sizeof(buf));
419
420 if (!entry)
421 return -EFAULT;
422
423 mutex_lock(&data.lock);
424 val = *entry;
425 mutex_unlock(&data.lock);
426
427 len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val);
428
429 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
430
431}
432
433/*
434 * simple_data_write - Wrapper write function for global state debugfs entries
435 * @filp: The active open file structure for the debugfs "file"
436 * @ubuf: The userspace provided buffer to write value from
437 * @cnt: The maximum number of bytes to write
438 * @ppos: The current "file" position
439 * @entry: The entry to write to
440 *
441 * This function provides a generic write implementation for the global state
442 * "data" structure debugfs filesystem entries. It would be nice to use
443 * simple_attr_write directly, but we need to make sure that the data.lock
444 * spinlock is held during the actual write (even though we likely won't ever
445 * actually race here as the updater runs under a stop_machine context).
446 */
447static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
448 size_t cnt, loff_t *ppos, u64 *entry)
449{
450 char buf[U64STR_SIZE];
451 int csize = min(cnt, sizeof(buf));
452 u64 val = 0;
453 int err = 0;
454
455 memset(buf, '\0', sizeof(buf));
456 if (copy_from_user(buf, ubuf, csize))
457 return -EFAULT;
458
459 buf[U64STR_SIZE-1] = '\0'; /* just in case */
460 err = strict_strtoull(buf, 10, &val);
461 if (err)
462 return -EINVAL;
463
464 mutex_lock(&data.lock);
465 *entry = val;
466 mutex_unlock(&data.lock);
467
468 return csize;
469}
470
471/**
472 * debug_count_fopen - Open function for "count" debugfs entry
473 * @inode: The in-kernel inode representation of the debugfs "file"
474 * @filp: The active open file structure for the debugfs "file"
475 *
476 * This function provides an open implementation for the "count" debugfs
477 * interface to the hardware latency detector.
478 */
479static int debug_count_fopen(struct inode *inode, struct file *filp)
480{
481 return 0;
482}
483
484/**
485 * debug_count_fread - Read function for "count" debugfs entry
486 * @filp: The active open file structure for the debugfs "file"
487 * @ubuf: The userspace provided buffer to read value into
488 * @cnt: The maximum number of bytes to read
489 * @ppos: The current "file" position
490 *
491 * This function provides a read implementation for the "count" debugfs
492 * interface to the hardware latency detector. Can be used to read the
493 * number of latency readings exceeding the configured threshold since
494 * the detector was last reset (e.g. by writing a zero into "count").
495 */
496static ssize_t debug_count_fread(struct file *filp, char __user *ubuf,
497 size_t cnt, loff_t *ppos)
498{
499 return simple_data_read(filp, ubuf, cnt, ppos, &data.count);
500}
501
502/**
503 * debug_count_fwrite - Write function for "count" debugfs entry
504 * @filp: The active open file structure for the debugfs "file"
505 * @ubuf: The user buffer that contains the value to write
506 * @cnt: The maximum number of bytes to write to "file"
507 * @ppos: The current position in the debugfs "file"
508 *
509 * This function provides a write implementation for the "count" debugfs
510 * interface to the hardware latency detector. Can be used to write a
511 * desired value, especially to zero the total count.
512 */
513static ssize_t debug_count_fwrite(struct file *filp,
514 const char __user *ubuf,
515 size_t cnt,
516 loff_t *ppos)
517{
518 return simple_data_write(filp, ubuf, cnt, ppos, &data.count);
519}
520
521/**
522 * debug_enable_fopen - Dummy open function for "enable" debugfs interface
523 * @inode: The in-kernel inode representation of the debugfs "file"
524 * @filp: The active open file structure for the debugfs "file"
525 *
526 * This function provides an open implementation for the "enable" debugfs
527 * interface to the hardware latency detector.
528 */
529static int debug_enable_fopen(struct inode *inode, struct file *filp)
530{
531 return 0;
532}
533
534/**
535 * debug_enable_fread - Read function for "enable" debugfs interface
536 * @filp: The active open file structure for the debugfs "file"
537 * @ubuf: The userspace provided buffer to read value into
538 * @cnt: The maximum number of bytes to read
539 * @ppos: The current "file" position
540 *
541 * This function provides a read implementation for the "enable" debugfs
542 * interface to the hardware latency detector. Can be used to determine
543 * whether the detector is currently enabled ("0\n" or "1\n" returned).
544 */
545static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
546 size_t cnt, loff_t *ppos)
547{
548 char buf[4];
549
550 if ((cnt < sizeof(buf)) || (*ppos))
551 return 0;
552
553 buf[0] = enabled ? '1' : '0';
554 buf[1] = '\n';
555 buf[2] = '\0';
556 if (copy_to_user(ubuf, buf, strlen(buf)))
557 return -EFAULT;
558 return *ppos = strlen(buf);
559}
560
561/**
562 * debug_enable_fwrite - Write function for "enable" debugfs interface
563 * @filp: The active open file structure for the debugfs "file"
564 * @ubuf: The user buffer that contains the value to write
565 * @cnt: The maximum number of bytes to write to "file"
566 * @ppos: The current position in the debugfs "file"
567 *
568 * This function provides a write implementation for the "enable" debugfs
569 * interface to the hardware latency detector. Can be used to enable or
570 * disable the detector, which will have the side-effect of possibly
571 * also resetting the global stats and kicking off the measuring
572 * kthread (on an enable) or the converse (upon a disable).
573 */
574static ssize_t debug_enable_fwrite(struct file *filp,
575 const char __user *ubuf,
576 size_t cnt,
577 loff_t *ppos)
578{
579 char buf[4];
580 int csize = min(cnt, sizeof(buf));
581 long val = 0;
582 int err = 0;
583
584 memset(buf, '\0', sizeof(buf));
585 if (copy_from_user(buf, ubuf, csize))
586 return -EFAULT;
587
588 buf[sizeof(buf)-1] = '\0'; /* just in case */
589 err = strict_strtoul(buf, 10, &val);
590 if (0 != err)
591 return -EINVAL;
592
593 if (val) {
594 if (enabled)
595 goto unlock;
596 enabled = 1;
597 __reset_stats();
598 if (start_kthread())
599 return -EFAULT;
600 } else {
601 if (!enabled)
602 goto unlock;
603 enabled = 0;
604 err = stop_kthread();
605 if (err) {
606 printk(KERN_ERR BANNER "cannot stop kthread\n");
607 return -EFAULT;
608 }
609 wake_up(&data.wq); /* reader(s) should return */
610 }
611unlock:
612 return csize;
613}
614
615/**
616 * debug_max_fopen - Open function for "max" debugfs entry
617 * @inode: The in-kernel inode representation of the debugfs "file"
618 * @filp: The active open file structure for the debugfs "file"
619 *
620 * This function provides an open implementation for the "max" debugfs
621 * interface to the hardware latency detector.
622 */
623static int debug_max_fopen(struct inode *inode, struct file *filp)
624{
625 return 0;
626}
627
628/**
629 * debug_max_fread - Read function for "max" debugfs entry
630 * @filp: The active open file structure for the debugfs "file"
631 * @ubuf: The userspace provided buffer to read value into
632 * @cnt: The maximum number of bytes to read
633 * @ppos: The current "file" position
634 *
635 * This function provides a read implementation for the "max" debugfs
636 * interface to the hardware latency detector. Can be used to determine
637 * the maximum latency value observed since it was last reset.
638 */
639static ssize_t debug_max_fread(struct file *filp, char __user *ubuf,
640 size_t cnt, loff_t *ppos)
641{
642 return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample);
643}
644
645/**
646 * debug_max_fwrite - Write function for "max" debugfs entry
647 * @filp: The active open file structure for the debugfs "file"
648 * @ubuf: The user buffer that contains the value to write
649 * @cnt: The maximum number of bytes to write to "file"
650 * @ppos: The current position in the debugfs "file"
651 *
652 * This function provides a write implementation for the "max" debugfs
653 * interface to the hardware latency detector. Can be used to reset the
654 * maximum or set it to some other desired value - if, then, subsequent
655 * measurements exceed this value, the maximum will be updated.
656 */
657static ssize_t debug_max_fwrite(struct file *filp,
658 const char __user *ubuf,
659 size_t cnt,
660 loff_t *ppos)
661{
662 return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample);
663}
664
665
666/**
667 * debug_sample_fopen - An open function for "sample" debugfs interface
668 * @inode: The in-kernel inode representation of this debugfs "file"
669 * @filp: The active open file structure for the debugfs "file"
670 *
671 * This function handles opening the "sample" file within the hardware
672 * latency detector debugfs directory interface. This file is used to read
673 * raw samples from the global ring_buffer and allows the user to see a
674 * running latency history. Can be opened blocking or non-blocking,
675 * affecting whether it behaves as a buffer read pipe, or does not.
676 * Implements simple locking to prevent multiple simultaneous use.
677 */
678static int debug_sample_fopen(struct inode *inode, struct file *filp)
679{
680 if (!atomic_add_unless(&data.sample_open, 1, 1))
681 return -EBUSY;
682 else
683 return 0;
684}
685
686/**
687 * debug_sample_fread - A read function for "sample" debugfs interface
688 * @filp: The active open file structure for the debugfs "file"
689 * @ubuf: The user buffer that will contain the samples read
690 * @cnt: The maximum bytes to read from the debugfs "file"
691 * @ppos: The current position in the debugfs "file"
692 *
693 * This function handles reading from the "sample" file within the hardware
694 * latency detector debugfs directory interface. This file is used to read
695 * raw samples from the global ring_buffer and allows the user to see a
696 * running latency history. By default this will block pending a new
697 * value written into the sample buffer, unless there are already a
698 * number of value(s) waiting in the buffer, or the sample file was
699 * previously opened in a non-blocking mode of operation.
700 */
701static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
702 size_t cnt, loff_t *ppos)
703{
704 int len = 0;
705 char buf[64];
706 struct sample *sample = NULL;
707
708 if (!enabled)
709 return 0;
710
711 sample = kzalloc(sizeof(struct sample), GFP_KERNEL);
712 if (!sample)
713 return -ENOMEM;
714
715 while (!buffer_get_sample(sample)) {
716
717 DEFINE_WAIT(wait);
718
719 if (filp->f_flags & O_NONBLOCK) {
720 len = -EAGAIN;
721 goto out;
722 }
723
724 prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE);
725 schedule();
726 finish_wait(&data.wq, &wait);
727
728 if (signal_pending(current)) {
729 len = -EINTR;
730 goto out;
731 }
732
733 if (!enabled) { /* enable was toggled */
734 len = 0;
735 goto out;
736 }
737 }
738
739 len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n",
740 sample->timestamp.tv_sec,
741 sample->timestamp.tv_nsec,
742 sample->duration);
743
744
745 /* handling partial reads is more trouble than it's worth */
746 if (len > cnt)
747 goto out;
748
749 if (copy_to_user(ubuf, buf, len))
750 len = -EFAULT;
751
752out:
753 kfree(sample);
754 return len;
755}
756
757/**
758 * debug_sample_release - Release function for "sample" debugfs interface
759 * @inode: The in-kernel inode represenation of the debugfs "file"
760 * @filp: The active open file structure for the debugfs "file"
761 *
762 * This function completes the close of the debugfs interface "sample" file.
763 * Frees the sample_open "lock" so that other users may open the interface.
764 */
765static int debug_sample_release(struct inode *inode, struct file *filp)
766{
767 atomic_dec(&data.sample_open);
768
769 return 0;
770}
771
772/**
773 * debug_threshold_fopen - Open function for "threshold" debugfs entry
774 * @inode: The in-kernel inode representation of the debugfs "file"
775 * @filp: The active open file structure for the debugfs "file"
776 *
777 * This function provides an open implementation for the "threshold" debugfs
778 * interface to the hardware latency detector.
779 */
780static int debug_threshold_fopen(struct inode *inode, struct file *filp)
781{
782 return 0;
783}
784
785/**
786 * debug_threshold_fread - Read function for "threshold" debugfs entry
787 * @filp: The active open file structure for the debugfs "file"
788 * @ubuf: The userspace provided buffer to read value into
789 * @cnt: The maximum number of bytes to read
790 * @ppos: The current "file" position
791 *
792 * This function provides a read implementation for the "threshold" debugfs
793 * interface to the hardware latency detector. It can be used to determine
794 * the current threshold level at which a latency will be recorded in the
795 * global ring buffer, typically on the order of 10us.
796 */
797static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf,
798 size_t cnt, loff_t *ppos)
799{
800 return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold);
801}
802
803/**
804 * debug_threshold_fwrite - Write function for "threshold" debugfs entry
805 * @filp: The active open file structure for the debugfs "file"
806 * @ubuf: The user buffer that contains the value to write
807 * @cnt: The maximum number of bytes to write to "file"
808 * @ppos: The current position in the debugfs "file"
809 *
810 * This function provides a write implementation for the "threshold" debugfs
811 * interface to the hardware latency detector. It can be used to configure
812 * the threshold level at which any subsequently detected latencies will
813 * be recorded into the global ring buffer.
814 */
815static ssize_t debug_threshold_fwrite(struct file *filp,
816 const char __user *ubuf,
817 size_t cnt,
818 loff_t *ppos)
819{
820 int ret;
821
822 ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold);
823
824 if (enabled)
825 wake_up_process(kthread);
826
827 return ret;
828}
829
830/**
831 * debug_width_fopen - Open function for "width" debugfs entry
832 * @inode: The in-kernel inode representation of the debugfs "file"
833 * @filp: The active open file structure for the debugfs "file"
834 *
835 * This function provides an open implementation for the "width" debugfs
836 * interface to the hardware latency detector.
837 */
838static int debug_width_fopen(struct inode *inode, struct file *filp)
839{
840 return 0;
841}
842
843/**
844 * debug_width_fread - Read function for "width" debugfs entry
845 * @filp: The active open file structure for the debugfs "file"
846 * @ubuf: The userspace provided buffer to read value into
847 * @cnt: The maximum number of bytes to read
848 * @ppos: The current "file" position
849 *
850 * This function provides a read implementation for the "width" debugfs
851 * interface to the hardware latency detector. It can be used to determine
852 * for how many us of the total window us we will actively sample for any
853 * hardware-induced latecy periods. Obviously, it is not possible to
854 * sample constantly and have the system respond to a sample reader, or,
855 * worse, without having the system appear to have gone out to lunch.
856 */
857static ssize_t debug_width_fread(struct file *filp, char __user *ubuf,
858 size_t cnt, loff_t *ppos)
859{
860 return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width);
861}
862
863/**
864 * debug_width_fwrite - Write function for "width" debugfs entry
865 * @filp: The active open file structure for the debugfs "file"
866 * @ubuf: The user buffer that contains the value to write
867 * @cnt: The maximum number of bytes to write to "file"
868 * @ppos: The current position in the debugfs "file"
869 *
870 * This function provides a write implementation for the "width" debugfs
871 * interface to the hardware latency detector. It can be used to configure
872 * for how many us of the total window us we will actively sample for any
873 * hardware-induced latency periods. Obviously, it is not possible to
874 * sample constantly and have the system respond to a sample reader, or,
875 * worse, without having the system appear to have gone out to lunch. It
876 * is enforced that width is less that the total window size.
877 */
878static ssize_t debug_width_fwrite(struct file *filp,
879 const char __user *ubuf,
880 size_t cnt,
881 loff_t *ppos)
882{
883 char buf[U64STR_SIZE];
884 int csize = min(cnt, sizeof(buf));
885 u64 val = 0;
886 int err = 0;
887
888 memset(buf, '\0', sizeof(buf));
889 if (copy_from_user(buf, ubuf, csize))
890 return -EFAULT;
891
892 buf[U64STR_SIZE-1] = '\0'; /* just in case */
893 err = strict_strtoull(buf, 10, &val);
894 if (0 != err)
895 return -EINVAL;
896
897 mutex_lock(&data.lock);
898 if (val < data.sample_window)
899 data.sample_width = val;
900 else {
901 mutex_unlock(&data.lock);
902 return -EINVAL;
903 }
904 mutex_unlock(&data.lock);
905
906 if (enabled)
907 wake_up_process(kthread);
908
909 return csize;
910}
911
912/**
913 * debug_window_fopen - Open function for "window" debugfs entry
914 * @inode: The in-kernel inode representation of the debugfs "file"
915 * @filp: The active open file structure for the debugfs "file"
916 *
917 * This function provides an open implementation for the "window" debugfs
918 * interface to the hardware latency detector. The window is the total time
919 * in us that will be considered one sample period. Conceptually, windows
920 * occur back-to-back and contain a sample width period during which
921 * actual sampling occurs.
922 */
923static int debug_window_fopen(struct inode *inode, struct file *filp)
924{
925 return 0;
926}
927
928/**
929 * debug_window_fread - Read function for "window" debugfs entry
930 * @filp: The active open file structure for the debugfs "file"
931 * @ubuf: The userspace provided buffer to read value into
932 * @cnt: The maximum number of bytes to read
933 * @ppos: The current "file" position
934 *
935 * This function provides a read implementation for the "window" debugfs
936 * interface to the hardware latency detector. The window is the total time
937 * in us that will be considered one sample period. Conceptually, windows
938 * occur back-to-back and contain a sample width period during which
939 * actual sampling occurs. Can be used to read the total window size.
940 */
941static ssize_t debug_window_fread(struct file *filp, char __user *ubuf,
942 size_t cnt, loff_t *ppos)
943{
944 return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window);
945}
946
947/**
948 * debug_window_fwrite - Write function for "window" debugfs entry
949 * @filp: The active open file structure for the debugfs "file"
950 * @ubuf: The user buffer that contains the value to write
951 * @cnt: The maximum number of bytes to write to "file"
952 * @ppos: The current position in the debugfs "file"
953 *
954 * This function provides a write implementation for the "window" debufds
955 * interface to the hardware latency detetector. The window is the total time
956 * in us that will be considered one sample period. Conceptually, windows
957 * occur back-to-back and contain a sample width period during which
958 * actual sampling occurs. Can be used to write a new total window size. It
959 * is enfoced that any value written must be greater than the sample width
960 * size, or an error results.
961 */
962static ssize_t debug_window_fwrite(struct file *filp,
963 const char __user *ubuf,
964 size_t cnt,
965 loff_t *ppos)
966{
967 char buf[U64STR_SIZE];
968 int csize = min(cnt, sizeof(buf));
969 u64 val = 0;
970 int err = 0;
971
972 memset(buf, '\0', sizeof(buf));
973 if (copy_from_user(buf, ubuf, csize))
974 return -EFAULT;
975
976 buf[U64STR_SIZE-1] = '\0'; /* just in case */
977 err = strict_strtoull(buf, 10, &val);
978 if (0 != err)
979 return -EINVAL;
980
981 mutex_lock(&data.lock);
982 if (data.sample_width < val)
983 data.sample_window = val;
984 else {
985 mutex_unlock(&data.lock);
986 return -EINVAL;
987 }
988 mutex_unlock(&data.lock);
989
990 return csize;
991}
992
993/*
994 * Function pointers for the "count" debugfs file operations
995 */
996static const struct file_operations count_fops = {
997 .open = debug_count_fopen,
998 .read = debug_count_fread,
999 .write = debug_count_fwrite,
1000 .owner = THIS_MODULE,
1001};
1002
1003/*
1004 * Function pointers for the "enable" debugfs file operations
1005 */
1006static const struct file_operations enable_fops = {
1007 .open = debug_enable_fopen,
1008 .read = debug_enable_fread,
1009 .write = debug_enable_fwrite,
1010 .owner = THIS_MODULE,
1011};
1012
1013/*
1014 * Function pointers for the "max" debugfs file operations
1015 */
1016static const struct file_operations max_fops = {
1017 .open = debug_max_fopen,
1018 .read = debug_max_fread,
1019 .write = debug_max_fwrite,
1020 .owner = THIS_MODULE,
1021};
1022
1023/*
1024 * Function pointers for the "sample" debugfs file operations
1025 */
1026static const struct file_operations sample_fops = {
1027 .open = debug_sample_fopen,
1028 .read = debug_sample_fread,
1029 .release = debug_sample_release,
1030 .owner = THIS_MODULE,
1031};
1032
1033/*
1034 * Function pointers for the "threshold" debugfs file operations
1035 */
1036static const struct file_operations threshold_fops = {
1037 .open = debug_threshold_fopen,
1038 .read = debug_threshold_fread,
1039 .write = debug_threshold_fwrite,
1040 .owner = THIS_MODULE,
1041};
1042
1043/*
1044 * Function pointers for the "width" debugfs file operations
1045 */
1046static const struct file_operations width_fops = {
1047 .open = debug_width_fopen,
1048 .read = debug_width_fread,
1049 .write = debug_width_fwrite,
1050 .owner = THIS_MODULE,
1051};
1052
1053/*
1054 * Function pointers for the "window" debugfs file operations
1055 */
1056static const struct file_operations window_fops = {
1057 .open = debug_window_fopen,
1058 .read = debug_window_fread,
1059 .write = debug_window_fwrite,
1060 .owner = THIS_MODULE,
1061};
1062
1063/**
1064 * init_debugfs - A function to initialize the debugfs interface files
1065 *
1066 * This function creates entries in debugfs for "hwlat_detector", including
1067 * files to read values from the detector, current samples, and the
1068 * maximum sample that has been captured since the hardware latency
1069 * dectector was started.
1070 */
1071static int init_debugfs(void)
1072{
1073 int ret = -ENOMEM;
1074
1075 debug_dir = debugfs_create_dir(DRVNAME, NULL);
1076 if (!debug_dir)
1077 goto err_debug_dir;
1078
1079 debug_sample = debugfs_create_file("sample", 0444,
1080 debug_dir, NULL,
1081 &sample_fops);
1082 if (!debug_sample)
1083 goto err_sample;
1084
1085 debug_count = debugfs_create_file("count", 0444,
1086 debug_dir, NULL,
1087 &count_fops);
1088 if (!debug_count)
1089 goto err_count;
1090
1091 debug_max = debugfs_create_file("max", 0444,
1092 debug_dir, NULL,
1093 &max_fops);
1094 if (!debug_max)
1095 goto err_max;
1096
1097 debug_sample_window = debugfs_create_file("window", 0644,
1098 debug_dir, NULL,
1099 &window_fops);
1100 if (!debug_sample_window)
1101 goto err_window;
1102
1103 debug_sample_width = debugfs_create_file("width", 0644,
1104 debug_dir, NULL,
1105 &width_fops);
1106 if (!debug_sample_width)
1107 goto err_width;
1108
1109 debug_threshold = debugfs_create_file("threshold", 0644,
1110 debug_dir, NULL,
1111 &threshold_fops);
1112 if (!debug_threshold)
1113 goto err_threshold;
1114
1115 debug_enable = debugfs_create_file("enable", 0644,
1116 debug_dir, &enabled,
1117 &enable_fops);
1118 if (!debug_enable)
1119 goto err_enable;
1120
1121 else {
1122 ret = 0;
1123 goto out;
1124 }
1125
1126err_enable:
1127 debugfs_remove(debug_threshold);
1128err_threshold:
1129 debugfs_remove(debug_sample_width);
1130err_width:
1131 debugfs_remove(debug_sample_window);
1132err_window:
1133 debugfs_remove(debug_max);
1134err_max:
1135 debugfs_remove(debug_count);
1136err_count:
1137 debugfs_remove(debug_sample);
1138err_sample:
1139 debugfs_remove(debug_dir);
1140err_debug_dir:
1141out:
1142 return ret;
1143}
1144
1145/**
1146 * free_debugfs - A function to cleanup the debugfs file interface
1147 */
1148static void free_debugfs(void)
1149{
1150 /* could also use a debugfs_remove_recursive */
1151 debugfs_remove(debug_enable);
1152 debugfs_remove(debug_threshold);
1153 debugfs_remove(debug_sample_width);
1154 debugfs_remove(debug_sample_window);
1155 debugfs_remove(debug_max);
1156 debugfs_remove(debug_count);
1157 debugfs_remove(debug_sample);
1158 debugfs_remove(debug_dir);
1159}
1160
1161/**
1162 * detector_init - Standard module initialization code
1163 */
1164static int detector_init(void)
1165{
1166 int ret = -ENOMEM;
1167
1168 printk(KERN_INFO BANNER "version %s\n", VERSION);
1169
1170 ret = init_stats();
1171 if (0 != ret)
1172 goto out;
1173
1174 ret = init_debugfs();
1175 if (0 != ret)
1176 goto err_stats;
1177
1178 if (enabled)
1179 ret = start_kthread();
1180
1181 goto out;
1182
1183err_stats:
1184 ring_buffer_free(ring_buffer);
1185out:
1186 return ret;
1187
1188}
1189
1190/**
1191 * detector_exit - Standard module cleanup code
1192 */
1193static void detector_exit(void)
1194{
1195 int err;
1196
1197 if (enabled) {
1198 enabled = 0;
1199 err = stop_kthread();
1200 if (err)
1201 printk(KERN_ERR BANNER "cannot stop kthread\n");
1202 }
1203
1204 free_debugfs();
1205 ring_buffer_free(ring_buffer); /* free up the ring buffer */
1206
1207}
1208
1209module_init(detector_init);
1210module_exit(detector_exit);