aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/mmiotrace/mmio-mod.c
diff options
context:
space:
mode:
authorPekka Paalanen <pq@iki.fi>2008-05-12 15:20:56 -0400
committerThomas Gleixner <tglx@linutronix.de>2008-05-24 05:21:14 -0400
commit8b7d89d02ef3c6a7c73d6596f28cea7632850af4 (patch)
tree32601bf4f34dd9e3ec1e9610c555e10dc448006c /arch/x86/kernel/mmiotrace/mmio-mod.c
parent677aa9f77e8de3791b481a0cec6c8b84d1eec626 (diff)
x86: mmiotrace - trace memory mapped IO
Mmiotrace is a tool for trapping memory mapped IO (MMIO) accesses within the kernel. It is used for debugging and especially for reverse engineering evil binary drivers. Mmiotrace works by wrapping the ioremap family of kernel functions and marking the returned pages as not present. Access to the IO memory triggers a page fault, which will be handled by mmiotrace's custom page fault handler. This will single-step the faulted instruction with the MMIO page marked as present. Access logs are directed to user space via relay and debug_fs. This page fault approach is necessary, because binary drivers have readl/writel etc. calls inlined and therefore extremely difficult to trap with with e.g. kprobes. This patch depends on the custom page fault handlers patch. Signed-off-by: Pekka Paalanen <pq@iki.fi> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel/mmiotrace/mmio-mod.c')
-rw-r--r--arch/x86/kernel/mmiotrace/mmio-mod.c527
1 files changed, 527 insertions, 0 deletions
diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c
new file mode 100644
index 000000000000..73561fe85f03
--- /dev/null
+++ b/arch/x86/kernel/mmiotrace/mmio-mod.c
@@ -0,0 +1,527 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2005
17 * Jeff Muizelaar, 2006, 2007
18 * Pekka Paalanen, 2008 <pq@iki.fi>
19 *
20 * Derived from the read-mod example from relay-examples by Tom Zanussi.
21 */
22#include <linux/module.h>
23#include <linux/relay.h>
24#include <linux/debugfs.h>
25#include <linux/proc_fs.h>
26#include <asm/io.h>
27#include <linux/version.h>
28#include <linux/kallsyms.h>
29#include <asm/pgtable.h>
30#include <linux/mmiotrace.h>
31#include <asm/e820.h> /* for ISA_START_ADDRESS */
32
33#include "kmmio.h"
34#include "pf_in.h"
35
36/* This app's relay channel files will appear in /debug/mmio-trace */
37#define APP_DIR "mmio-trace"
38/* the marker injection file in /proc */
39#define MARKER_FILE "mmio-marker"
40
41#define MODULE_NAME "mmiotrace"
42
43struct trap_reason {
44 unsigned long addr;
45 unsigned long ip;
46 enum reason_type type;
47 int active_traces;
48};
49
50static struct trap_reason pf_reason[NR_CPUS];
51static struct mm_io_header_rw cpu_trace[NR_CPUS];
52
53static struct file_operations mmio_fops = {
54 .owner = THIS_MODULE,
55};
56
57static const size_t subbuf_size = 256*1024;
58static struct rchan *chan;
59static struct dentry *dir;
60static int suspended; /* XXX should this be per cpu? */
61static struct proc_dir_entry *proc_marker_file;
62
63/* module parameters */
64static unsigned int n_subbufs = 32*4;
65static unsigned long filter_offset;
66static int nommiotrace;
67static int ISA_trace;
68static int trace_pc;
69
70module_param(n_subbufs, uint, 0);
71module_param(filter_offset, ulong, 0);
72module_param(nommiotrace, bool, 0);
73module_param(ISA_trace, bool, 0);
74module_param(trace_pc, bool, 0);
75
76MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128.");
77MODULE_PARM_DESC(filter_offset, "Start address of traced mappings.");
78MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing.");
79MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range.");
80MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions.");
81
82static void record_timestamp(struct mm_io_header *header)
83{
84 struct timespec now;
85
86 getnstimeofday(&now);
87 header->sec = now.tv_sec;
88 header->nsec = now.tv_nsec;
89}
90
91/*
92 * Write callback for the /proc entry:
93 * Read a marker and write it to the mmio trace log
94 */
95static int write_marker(struct file *file, const char __user *buffer,
96 unsigned long count, void *data)
97{
98 char *event = NULL;
99 struct mm_io_header *headp;
100 int len = (count > 65535) ? 65535 : count;
101
102 event = kzalloc(sizeof(*headp) + len, GFP_KERNEL);
103 if (!event)
104 return -ENOMEM;
105
106 headp = (struct mm_io_header *)event;
107 headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT);
108 headp->data_len = len;
109 record_timestamp(headp);
110
111 if (copy_from_user(event + sizeof(*headp), buffer, len)) {
112 kfree(event);
113 return -EFAULT;
114 }
115
116 relay_write(chan, event, sizeof(*headp) + len);
117 kfree(event);
118 return len;
119}
120
121static void print_pte(unsigned long address)
122{
123 pgd_t *pgd = pgd_offset_k(address);
124 pud_t *pud = pud_offset(pgd, address);
125 pmd_t *pmd = pmd_offset(pud, address);
126 if (pmd_large(*pmd)) {
127 printk(KERN_EMERG MODULE_NAME ": 4MB pages are not "
128 "currently supported: %lx\n",
129 address);
130 BUG();
131 }
132 printk(KERN_DEBUG MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n",
133 address,
134 pte_val(*pte_offset_kernel(pmd, address)),
135 pte_val(*pte_offset_kernel(pmd, address)) & _PAGE_PRESENT);
136}
137
138/*
139 * For some reason the pre/post pairs have been called in an
140 * unmatched order. Report and die.
141 */
142static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
143{
144 const unsigned long cpu = smp_processor_id();
145 printk(KERN_EMERG MODULE_NAME ": unexpected fault for address: %lx, "
146 "last fault for address: %lx\n",
147 addr, pf_reason[cpu].addr);
148 print_pte(addr);
149#ifdef __i386__
150 print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip);
151 print_symbol(KERN_EMERG "last faulting EIP was at %s\n",
152 pf_reason[cpu].ip);
153 printk(KERN_EMERG
154 "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
155 regs->ax, regs->bx, regs->cx, regs->dx);
156 printk(KERN_EMERG
157 "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
158 regs->si, regs->di, regs->bp, regs->sp);
159#else
160 print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip);
161 print_symbol(KERN_EMERG "last faulting RIP was at %s\n",
162 pf_reason[cpu].ip);
163 printk(KERN_EMERG "rax: %016lx rcx: %016lx rdx: %016lx\n",
164 regs->ax, regs->cx, regs->dx);
165 printk(KERN_EMERG "rsi: %016lx rdi: %016lx "
166 "rbp: %016lx rsp: %016lx\n",
167 regs->si, regs->di, regs->bp, regs->sp);
168#endif
169 BUG();
170}
171
172static void pre(struct kmmio_probe *p, struct pt_regs *regs,
173 unsigned long addr)
174{
175 const unsigned long cpu = smp_processor_id();
176 const unsigned long instptr = instruction_pointer(regs);
177 const enum reason_type type = get_ins_type(instptr);
178
179 /* it doesn't make sense to have more than one active trace per cpu */
180 if (pf_reason[cpu].active_traces)
181 die_kmmio_nesting_error(regs, addr);
182 else
183 pf_reason[cpu].active_traces++;
184
185 pf_reason[cpu].type = type;
186 pf_reason[cpu].addr = addr;
187 pf_reason[cpu].ip = instptr;
188
189 cpu_trace[cpu].header.type = MMIO_MAGIC;
190 cpu_trace[cpu].header.pid = 0;
191 cpu_trace[cpu].header.data_len = sizeof(struct mm_io_rw);
192 cpu_trace[cpu].rw.address = addr;
193
194 /*
195 * Only record the program counter when requested.
196 * It may taint clean-room reverse engineering.
197 */
198 if (trace_pc)
199 cpu_trace[cpu].rw.pc = instptr;
200 else
201 cpu_trace[cpu].rw.pc = 0;
202
203 record_timestamp(&cpu_trace[cpu].header);
204
205 switch (type) {
206 case REG_READ:
207 cpu_trace[cpu].header.type |=
208 (MMIO_READ << MMIO_OPCODE_SHIFT) |
209 (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT);
210 break;
211 case REG_WRITE:
212 cpu_trace[cpu].header.type |=
213 (MMIO_WRITE << MMIO_OPCODE_SHIFT) |
214 (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT);
215 cpu_trace[cpu].rw.value = get_ins_reg_val(instptr, regs);
216 break;
217 case IMM_WRITE:
218 cpu_trace[cpu].header.type |=
219 (MMIO_WRITE << MMIO_OPCODE_SHIFT) |
220 (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT);
221 cpu_trace[cpu].rw.value = get_ins_imm_val(instptr);
222 break;
223 default:
224 {
225 unsigned char *ip = (unsigned char *)instptr;
226 cpu_trace[cpu].header.type |=
227 (MMIO_UNKNOWN_OP << MMIO_OPCODE_SHIFT);
228 cpu_trace[cpu].rw.value = (*ip) << 16 |
229 *(ip + 1) << 8 |
230 *(ip + 2);
231 }
232 }
233}
234
235static void post(struct kmmio_probe *p, unsigned long condition,
236 struct pt_regs *regs)
237{
238 const unsigned long cpu = smp_processor_id();
239
240 /* this should always return the active_trace count to 0 */
241 pf_reason[cpu].active_traces--;
242 if (pf_reason[cpu].active_traces) {
243 printk(KERN_EMERG MODULE_NAME ": unexpected post handler");
244 BUG();
245 }
246
247 switch (pf_reason[cpu].type) {
248 case REG_READ:
249 cpu_trace[cpu].rw.value = get_ins_reg_val(pf_reason[cpu].ip,
250 regs);
251 break;
252 default:
253 break;
254 }
255 relay_write(chan, &cpu_trace[cpu], sizeof(struct mm_io_header_rw));
256}
257
258/*
259 * subbuf_start() relay callback.
260 *
261 * Defined so that we know when events are dropped due to the buffer-full
262 * condition.
263 */
264static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf,
265 void *prev_subbuf, size_t prev_padding)
266{
267 if (relay_buf_full(buf)) {
268 if (!suspended) {
269 suspended = 1;
270 printk(KERN_ERR MODULE_NAME
271 ": cpu %d buffer full!!!\n",
272 smp_processor_id());
273 }
274 return 0;
275 } else if (suspended) {
276 suspended = 0;
277 printk(KERN_ERR MODULE_NAME
278 ": cpu %d buffer no longer full.\n",
279 smp_processor_id());
280 }
281
282 return 1;
283}
284
285/* file_create() callback. Creates relay file in debugfs. */
286static struct dentry *create_buf_file_handler(const char *filename,
287 struct dentry *parent,
288 int mode,
289 struct rchan_buf *buf,
290 int *is_global)
291{
292 struct dentry *buf_file;
293
294 mmio_fops.read = relay_file_operations.read;
295 mmio_fops.open = relay_file_operations.open;
296 mmio_fops.poll = relay_file_operations.poll;
297 mmio_fops.mmap = relay_file_operations.mmap;
298 mmio_fops.release = relay_file_operations.release;
299 mmio_fops.splice_read = relay_file_operations.splice_read;
300
301 buf_file = debugfs_create_file(filename, mode, parent, buf,
302 &mmio_fops);
303
304 return buf_file;
305}
306
307/* file_remove() default callback. Removes relay file in debugfs. */
308static int remove_buf_file_handler(struct dentry *dentry)
309{
310 debugfs_remove(dentry);
311 return 0;
312}
313
314static struct rchan_callbacks relay_callbacks = {
315 .subbuf_start = subbuf_start_handler,
316 .create_buf_file = create_buf_file_handler,
317 .remove_buf_file = remove_buf_file_handler,
318};
319
320/*
321 * create_channel - creates channel /debug/APP_DIR/cpuXXX
322 * Returns channel on success, NULL otherwise
323 */
324static struct rchan *create_channel(unsigned size, unsigned n)
325{
326 return relay_open("cpu", dir, size, n, &relay_callbacks, NULL);
327}
328
329/* destroy_channel - destroys channel /debug/APP_DIR/cpuXXX */
330static void destroy_channel(void)
331{
332 if (chan) {
333 relay_close(chan);
334 chan = NULL;
335 }
336}
337
338struct remap_trace {
339 struct list_head list;
340 struct kmmio_probe probe;
341};
342static LIST_HEAD(trace_list);
343static DEFINE_SPINLOCK(trace_list_lock);
344
345static void do_ioremap_trace_core(unsigned long offset, unsigned long size,
346 void __iomem *addr)
347{
348 struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL);
349 struct mm_io_header_map event = {
350 .header = {
351 .type = MMIO_MAGIC |
352 (MMIO_PROBE << MMIO_OPCODE_SHIFT),
353 .sec = 0,
354 .nsec = 0,
355 .pid = 0,
356 .data_len = sizeof(struct mm_io_map)
357 },
358 .map = {
359 .phys = offset,
360 .addr = (unsigned long)addr,
361 .len = size,
362 .pc = 0
363 }
364 };
365 record_timestamp(&event.header);
366
367 *trace = (struct remap_trace) {
368 .probe = {
369 .addr = (unsigned long)addr,
370 .len = size,
371 .pre_handler = pre,
372 .post_handler = post,
373 }
374 };
375
376 relay_write(chan, &event, sizeof(event));
377 spin_lock(&trace_list_lock);
378 list_add_tail(&trace->list, &trace_list);
379 spin_unlock(&trace_list_lock);
380 if (!nommiotrace)
381 register_kmmio_probe(&trace->probe);
382}
383
384static void ioremap_trace_core(unsigned long offset, unsigned long size,
385 void __iomem *addr)
386{
387 if ((filter_offset) && (offset != filter_offset))
388 return;
389
390 /* Don't trace the low PCI/ISA area, it's always mapped.. */
391 if (!ISA_trace && (offset < ISA_END_ADDRESS) &&
392 (offset + size > ISA_START_ADDRESS)) {
393 printk(KERN_NOTICE MODULE_NAME ": Ignoring map of low "
394 "PCI/ISA area (0x%lx-0x%lx)\n",
395 offset, offset + size);
396 return;
397 }
398 do_ioremap_trace_core(offset, size, addr);
399}
400
401void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size)
402{
403 void __iomem *p = ioremap_cache(offset, size);
404 printk(KERN_DEBUG MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n",
405 offset, size, p);
406 ioremap_trace_core(offset, size, p);
407 return p;
408}
409EXPORT_SYMBOL(ioremap_cache_trace);
410
411void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size)
412{
413 void __iomem *p = ioremap_nocache(offset, size);
414 printk(KERN_DEBUG MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n",
415 offset, size, p);
416 ioremap_trace_core(offset, size, p);
417 return p;
418}
419EXPORT_SYMBOL(ioremap_nocache_trace);
420
421void iounmap_trace(volatile void __iomem *addr)
422{
423 struct mm_io_header_map event = {
424 .header = {
425 .type = MMIO_MAGIC |
426 (MMIO_UNPROBE << MMIO_OPCODE_SHIFT),
427 .sec = 0,
428 .nsec = 0,
429 .pid = 0,
430 .data_len = sizeof(struct mm_io_map)
431 },
432 .map = {
433 .phys = 0,
434 .addr = (unsigned long)addr,
435 .len = 0,
436 .pc = 0
437 }
438 };
439 struct remap_trace *trace;
440 struct remap_trace *tmp;
441 printk(KERN_DEBUG MODULE_NAME ": Unmapping %p.\n", addr);
442 record_timestamp(&event.header);
443
444 spin_lock(&trace_list_lock);
445 list_for_each_entry_safe(trace, tmp, &trace_list, list) {
446 if ((unsigned long)addr == trace->probe.addr) {
447 if (!nommiotrace)
448 unregister_kmmio_probe(&trace->probe);
449 list_del(&trace->list);
450 kfree(trace);
451 break;
452 }
453 }
454 spin_unlock(&trace_list_lock);
455 relay_write(chan, &event, sizeof(event));
456 iounmap(addr);
457}
458EXPORT_SYMBOL(iounmap_trace);
459
460static void clear_trace_list(void)
461{
462 struct remap_trace *trace;
463 struct remap_trace *tmp;
464
465 spin_lock(&trace_list_lock);
466 list_for_each_entry_safe(trace, tmp, &trace_list, list) {
467 printk(KERN_WARNING MODULE_NAME ": purging non-iounmapped "
468 "trace @0x%08lx, size 0x%lx.\n",
469 trace->probe.addr, trace->probe.len);
470 if (!nommiotrace)
471 unregister_kmmio_probe(&trace->probe);
472 list_del(&trace->list);
473 kfree(trace);
474 break;
475 }
476 spin_unlock(&trace_list_lock);
477}
478
479static int __init init(void)
480{
481 if (n_subbufs < 2)
482 return -EINVAL;
483
484 dir = debugfs_create_dir(APP_DIR, NULL);
485 if (!dir) {
486 printk(KERN_ERR MODULE_NAME
487 ": Couldn't create relay app directory.\n");
488 return -ENOMEM;
489 }
490
491 chan = create_channel(subbuf_size, n_subbufs);
492 if (!chan) {
493 debugfs_remove(dir);
494 printk(KERN_ERR MODULE_NAME
495 ": relay app channel creation failed\n");
496 return -ENOMEM;
497 }
498
499 init_kmmio();
500
501 proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL);
502 if (proc_marker_file)
503 proc_marker_file->write_proc = write_marker;
504
505 printk(KERN_DEBUG MODULE_NAME ": loaded.\n");
506 if (nommiotrace)
507 printk(KERN_DEBUG MODULE_NAME ": MMIO tracing disabled.\n");
508 if (ISA_trace)
509 printk(KERN_WARNING MODULE_NAME
510 ": Warning! low ISA range will be traced.\n");
511 return 0;
512}
513
514static void __exit cleanup(void)
515{
516 printk(KERN_DEBUG MODULE_NAME ": unload...\n");
517 clear_trace_list();
518 cleanup_kmmio();
519 remove_proc_entry(MARKER_FILE, NULL);
520 destroy_channel();
521 if (dir)
522 debugfs_remove(dir);
523}
524
525module_init(init);
526module_exit(cleanup);
527MODULE_LICENSE("GPL");