aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPekka Paalanen <pq@iki.fi>2008-05-12 15:20:56 -0400
committerThomas Gleixner <tglx@linutronix.de>2008-05-24 05:21:14 -0400
commit8b7d89d02ef3c6a7c73d6596f28cea7632850af4 (patch)
tree32601bf4f34dd9e3ec1e9610c555e10dc448006c
parent677aa9f77e8de3791b481a0cec6c8b84d1eec626 (diff)
x86: mmiotrace - trace memory mapped IO
Mmiotrace is a tool for trapping memory mapped IO (MMIO) accesses within the kernel. It is used for debugging and especially for reverse engineering evil binary drivers. Mmiotrace works by wrapping the ioremap family of kernel functions and marking the returned pages as not present. Access to the IO memory triggers a page fault, which will be handled by mmiotrace's custom page fault handler. This will single-step the faulted instruction with the MMIO page marked as present. Access logs are directed to user space via relay and debug_fs. This page fault approach is necessary, because binary drivers have readl/writel etc. calls inlined and therefore extremely difficult to trap with with e.g. kprobes. This patch depends on the custom page fault handlers patch. Signed-off-by: Pekka Paalanen <pq@iki.fi> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/Kconfig.debug27
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/init_task.c1
-rw-r--r--arch/x86/kernel/mmiotrace/Makefile4
-rw-r--r--arch/x86/kernel/mmiotrace/kmmio.c391
-rw-r--r--arch/x86/kernel/mmiotrace/kmmio.h58
-rw-r--r--arch/x86/kernel/mmiotrace/mmio-mod.c527
-rw-r--r--arch/x86/kernel/mmiotrace/pf_in.c489
-rw-r--r--arch/x86/kernel/mmiotrace/pf_in.h39
-rw-r--r--arch/x86/kernel/mmiotrace/testmmiotrace.c77
-rw-r--r--include/linux/mmiotrace.h62
11 files changed, 1677 insertions, 0 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 9431a8399844..7c6496e2225e 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -176,6 +176,33 @@ config PAGE_FAULT_HANDLERS
176 register a function that is called on every page fault. Custom 176 register a function that is called on every page fault. Custom
177 handlers are used by some debugging and reverse engineering tools. 177 handlers are used by some debugging and reverse engineering tools.
178 178
179config MMIOTRACE
180 tristate "Memory mapped IO tracing"
181 depends on DEBUG_KERNEL && PAGE_FAULT_HANDLERS && RELAY && DEBUG_FS
182 default n
183 help
184 This will build a kernel module called mmiotrace.
185
186 Mmiotrace traces Memory Mapped I/O access and is meant for debugging
187 and reverse engineering. The kernel module offers wrapped
188 versions of the ioremap family of functions. The driver to be traced
189 must be modified to call these wrappers. A user space program is
190 required to collect the MMIO data.
191
192 See http://nouveau.freedesktop.org/wiki/MmioTrace
193 If you are not helping to develop drivers, say N.
194
195config MMIOTRACE_TEST
196 tristate "Test module for mmiotrace"
197 depends on MMIOTRACE && m
198 default n
199 help
200 This is a dumb module for testing mmiotrace. It is very dangerous
201 as it will write garbage to IO memory starting at a given address.
202 However, it should be safe to use on e.g. unused portion of VRAM.
203
204 Say N, unless you absolutely know what you are doing.
205
179# 206#
180# IO delay types: 207# IO delay types:
181# 208#
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 739d49acd2f1..a51ac153685e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -79,6 +79,8 @@ obj-$(CONFIG_KGDB) += kgdb.o
79obj-$(CONFIG_VM86) += vm86_32.o 79obj-$(CONFIG_VM86) += vm86_32.o
80obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 80obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
81 81
82obj-$(CONFIG_MMIOTRACE) += mmiotrace/
83
82obj-$(CONFIG_HPET_TIMER) += hpet.o 84obj-$(CONFIG_HPET_TIMER) += hpet.o
83 85
84obj-$(CONFIG_K8_NB) += k8.o 86obj-$(CONFIG_K8_NB) += k8.o
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index a4f93b4120c1..027a5b6a12b2 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -15,6 +15,7 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
15static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 15static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
16struct mm_struct init_mm = INIT_MM(init_mm); 16struct mm_struct init_mm = INIT_MM(init_mm);
17EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */ 17EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */
18EXPORT_SYMBOL_GPL(init_mm);
18 19
19/* 20/*
20 * Initial thread structure. 21 * Initial thread structure.
diff --git a/arch/x86/kernel/mmiotrace/Makefile b/arch/x86/kernel/mmiotrace/Makefile
new file mode 100644
index 000000000000..d6905f7f981b
--- /dev/null
+++ b/arch/x86/kernel/mmiotrace/Makefile
@@ -0,0 +1,4 @@
1obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
2mmiotrace-objs := pf_in.o kmmio.o mmio-mod.o
3
4obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c
new file mode 100644
index 000000000000..8ba48f9c91b4
--- /dev/null
+++ b/arch/x86/kernel/mmiotrace/kmmio.c
@@ -0,0 +1,391 @@
1/* Support for MMIO probes.
2 * Benfit many code from kprobes
3 * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
4 * 2007 Alexander Eichner
5 * 2008 Pekka Paalanen <pq@iki.fi>
6 */
7
8#include <linux/version.h>
9#include <linux/spinlock.h>
10#include <linux/hash.h>
11#include <linux/init.h>
12#include <linux/module.h>
13#include <linux/slab.h>
14#include <linux/kernel.h>
15#include <linux/mm.h>
16#include <linux/uaccess.h>
17#include <linux/ptrace.h>
18#include <linux/preempt.h>
19#include <asm/io.h>
20#include <asm/cacheflush.h>
21#include <asm/errno.h>
22#include <asm/tlbflush.h>
23
24#include "kmmio.h"
25
26#define KMMIO_HASH_BITS 6
27#define KMMIO_TABLE_SIZE (1 << KMMIO_HASH_BITS)
28#define KMMIO_PAGE_HASH_BITS 4
29#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
30
31struct kmmio_context {
32 struct kmmio_fault_page *fpage;
33 struct kmmio_probe *probe;
34 unsigned long saved_flags;
35 int active;
36};
37
38static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code,
39 unsigned long address);
40static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
41 void *args);
42
43static DEFINE_SPINLOCK(kmmio_lock);
44
45/* These are protected by kmmio_lock */
46unsigned int kmmio_count;
47static unsigned int handler_registered;
48static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
49static LIST_HEAD(kmmio_probes);
50
51static struct kmmio_context kmmio_ctx[NR_CPUS];
52
53static struct pf_handler kmmio_pf_hook = {
54 .handler = kmmio_page_fault
55};
56
57static struct notifier_block nb_die = {
58 .notifier_call = kmmio_die_notifier
59};
60
61int init_kmmio(void)
62{
63 int i;
64 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
65 INIT_LIST_HEAD(&kmmio_page_table[i]);
66
67 register_die_notifier(&nb_die);
68 return 0;
69}
70
71void cleanup_kmmio(void)
72{
73 /*
74 * Assume the following have been already cleaned by calling
75 * unregister_kmmio_probe() appropriately:
76 * kmmio_page_table, kmmio_probes
77 */
78 if (handler_registered) {
79 unregister_page_fault_handler(&kmmio_pf_hook);
80 synchronize_rcu();
81 }
82 unregister_die_notifier(&nb_die);
83}
84
85/*
86 * this is basically a dynamic stabbing problem:
87 * Could use the existing prio tree code or
88 * Possible better implementations:
89 * The Interval Skip List: A Data Structure for Finding All Intervals That
90 * Overlap a Point (might be simple)
91 * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
92 */
93/* Get the kmmio at this addr (if any). You must be holding kmmio_lock. */
94static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
95{
96 struct kmmio_probe *p;
97 list_for_each_entry(p, &kmmio_probes, list) {
98 if (addr >= p->addr && addr <= (p->addr + p->len))
99 return p;
100 }
101 return NULL;
102}
103
104static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
105{
106 struct list_head *head, *tmp;
107
108 page &= PAGE_MASK;
109 head = &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
110 list_for_each(tmp, head) {
111 struct kmmio_fault_page *p
112 = list_entry(tmp, struct kmmio_fault_page, list);
113 if (p->page == page)
114 return p;
115 }
116
117 return NULL;
118}
119
120static void arm_kmmio_fault_page(unsigned long page, int *large)
121{
122 unsigned long address = page & PAGE_MASK;
123 pgd_t *pgd = pgd_offset_k(address);
124 pud_t *pud = pud_offset(pgd, address);
125 pmd_t *pmd = pmd_offset(pud, address);
126 pte_t *pte = pte_offset_kernel(pmd, address);
127
128 if (pmd_large(*pmd)) {
129 set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_PRESENT));
130 if (large)
131 *large = 1;
132 } else {
133 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
134 }
135
136 __flush_tlb_one(page);
137}
138
139static void disarm_kmmio_fault_page(unsigned long page, int *large)
140{
141 unsigned long address = page & PAGE_MASK;
142 pgd_t *pgd = pgd_offset_k(address);
143 pud_t *pud = pud_offset(pgd, address);
144 pmd_t *pmd = pmd_offset(pud, address);
145 pte_t *pte = pte_offset_kernel(pmd, address);
146
147 if (large && *large) {
148 set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_PRESENT));
149 *large = 0;
150 } else {
151 set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
152 }
153
154 __flush_tlb_one(page);
155}
156
157/*
158 * Interrupts are disabled on entry as trap3 is an interrupt gate
159 * and they remain disabled thorough out this function.
160 */
161static int kmmio_handler(struct pt_regs *regs, unsigned long addr)
162{
163 struct kmmio_context *ctx;
164 int cpu;
165
166 /*
167 * Preemption is now disabled to prevent process switch during
168 * single stepping. We can only handle one active kmmio trace
169 * per cpu, so ensure that we finish it before something else
170 * gets to run.
171 *
172 * XXX what if an interrupt occurs between returning from
173 * do_page_fault() and entering the single-step exception handler?
174 * And that interrupt triggers a kmmio trap?
175 */
176 preempt_disable();
177 cpu = smp_processor_id();
178 ctx = &kmmio_ctx[cpu];
179
180 /* interrupts disabled and CPU-local data => atomicity guaranteed. */
181 if (ctx->active) {
182 /*
183 * This avoids a deadlock with kmmio_lock.
184 * If this page fault really was due to kmmio trap,
185 * all hell breaks loose.
186 */
187 printk(KERN_EMERG "mmiotrace: recursive probe hit on CPU %d, "
188 "for address %lu. Ignoring.\n",
189 cpu, addr);
190 goto no_kmmio;
191 }
192 ctx->active++;
193
194 /*
195 * Acquire the kmmio lock to prevent changes affecting
196 * get_kmmio_fault_page() and get_kmmio_probe(), since we save their
197 * returned pointers.
198 * The lock is released in post_kmmio_handler().
199 * XXX: could/should get_kmmio_*() be using RCU instead of spinlock?
200 */
201 spin_lock(&kmmio_lock);
202
203 ctx->fpage = get_kmmio_fault_page(addr);
204 if (!ctx->fpage) {
205 /* this page fault is not caused by kmmio */
206 goto no_kmmio_locked;
207 }
208
209 ctx->probe = get_kmmio_probe(addr);
210 ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK));
211
212 if (ctx->probe && ctx->probe->pre_handler)
213 ctx->probe->pre_handler(ctx->probe, regs, addr);
214
215 regs->flags |= TF_MASK;
216 regs->flags &= ~IF_MASK;
217
218 /* We hold lock, now we set present bit in PTE and single step. */
219 disarm_kmmio_fault_page(ctx->fpage->page, NULL);
220
221 return 1;
222
223no_kmmio_locked:
224 spin_unlock(&kmmio_lock);
225 ctx->active--;
226no_kmmio:
227 preempt_enable_no_resched();
228 /* page fault not handled by kmmio */
229 return 0;
230}
231
232/*
233 * Interrupts are disabled on entry as trap1 is an interrupt gate
234 * and they remain disabled thorough out this function.
235 * And we hold kmmio lock.
236 */
237static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
238{
239 int cpu = smp_processor_id();
240 struct kmmio_context *ctx = &kmmio_ctx[cpu];
241
242 if (!ctx->active)
243 return 0;
244
245 if (ctx->probe && ctx->probe->post_handler)
246 ctx->probe->post_handler(ctx->probe, condition, regs);
247
248 arm_kmmio_fault_page(ctx->fpage->page, NULL);
249
250 regs->flags &= ~TF_MASK;
251 regs->flags |= ctx->saved_flags;
252
253 /* These were acquired in kmmio_handler(). */
254 ctx->active--;
255 spin_unlock(&kmmio_lock);
256 preempt_enable_no_resched();
257
258 /*
259 * if somebody else is singlestepping across a probe point, flags
260 * will have TF set, in which case, continue the remaining processing
261 * of do_debug, as if this is not a probe hit.
262 */
263 if (regs->flags & TF_MASK)
264 return 0;
265
266 return 1;
267}
268
269static int add_kmmio_fault_page(unsigned long page)
270{
271 struct kmmio_fault_page *f;
272
273 page &= PAGE_MASK;
274 f = get_kmmio_fault_page(page);
275 if (f) {
276 f->count++;
277 return 0;
278 }
279
280 f = kmalloc(sizeof(*f), GFP_ATOMIC);
281 if (!f)
282 return -1;
283
284 f->count = 1;
285 f->page = page;
286 list_add(&f->list,
287 &kmmio_page_table[hash_long(f->page, KMMIO_PAGE_HASH_BITS)]);
288
289 arm_kmmio_fault_page(f->page, NULL);
290
291 return 0;
292}
293
294static void release_kmmio_fault_page(unsigned long page)
295{
296 struct kmmio_fault_page *f;
297
298 page &= PAGE_MASK;
299 f = get_kmmio_fault_page(page);
300 if (!f)
301 return;
302
303 f->count--;
304 if (!f->count) {
305 disarm_kmmio_fault_page(f->page, NULL);
306 list_del(&f->list);
307 }
308}
309
310int register_kmmio_probe(struct kmmio_probe *p)
311{
312 int ret = 0;
313 unsigned long size = 0;
314
315 spin_lock_irq(&kmmio_lock);
316 kmmio_count++;
317 if (get_kmmio_probe(p->addr)) {
318 ret = -EEXIST;
319 goto out;
320 }
321 list_add(&p->list, &kmmio_probes);
322 /*printk("adding fault pages...\n");*/
323 while (size < p->len) {
324 if (add_kmmio_fault_page(p->addr + size))
325 printk(KERN_ERR "mmio: Unable to set page fault.\n");
326 size += PAGE_SIZE;
327 }
328
329 if (!handler_registered) {
330 register_page_fault_handler(&kmmio_pf_hook);
331 handler_registered++;
332 }
333
334out:
335 spin_unlock_irq(&kmmio_lock);
336 /*
337 * XXX: What should I do here?
338 * Here was a call to global_flush_tlb(), but it does not exist
339 * anymore.
340 */
341 return ret;
342}
343
344void unregister_kmmio_probe(struct kmmio_probe *p)
345{
346 unsigned long size = 0;
347
348 spin_lock_irq(&kmmio_lock);
349 while (size < p->len) {
350 release_kmmio_fault_page(p->addr + size);
351 size += PAGE_SIZE;
352 }
353 list_del(&p->list);
354 kmmio_count--;
355 spin_unlock_irq(&kmmio_lock);
356}
357
358/*
359 * According to 2.6.20, mainly x86_64 arch:
360 * This is being called from do_page_fault(), via the page fault notifier
361 * chain. The chain is called for both user space faults and kernel space
362 * faults (address >= TASK_SIZE64), except not on faults serviced by
363 * vmalloc_fault().
364 *
365 * We may be in an interrupt or a critical section. Also prefecthing may
366 * trigger a page fault. We may be in the middle of process switch.
367 * The page fault hook functionality has put us inside RCU read lock.
368 *
369 * Local interrupts are disabled, so preemption cannot happen.
370 * Do not enable interrupts, do not sleep, and watch out for other CPUs.
371 */
372static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code,
373 unsigned long address)
374{
375 if (is_kmmio_active())
376 if (kmmio_handler(regs, address) == 1)
377 return -1;
378 return 0;
379}
380
381static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
382 void *args)
383{
384 struct die_args *arg = args;
385
386 if (val == DIE_DEBUG)
387 if (post_kmmio_handler(arg->err, arg->regs) == 1)
388 return NOTIFY_STOP;
389
390 return NOTIFY_DONE;
391}
diff --git a/arch/x86/kernel/mmiotrace/kmmio.h b/arch/x86/kernel/mmiotrace/kmmio.h
new file mode 100644
index 000000000000..85b7f68a3b8a
--- /dev/null
+++ b/arch/x86/kernel/mmiotrace/kmmio.h
@@ -0,0 +1,58 @@
1#ifndef _LINUX_KMMIO_H
2#define _LINUX_KMMIO_H
3
4#include <linux/list.h>
5#include <linux/notifier.h>
6#include <linux/smp.h>
7#include <linux/types.h>
8#include <linux/ptrace.h>
9#include <linux/version.h>
10#include <linux/kdebug.h>
11
12struct kmmio_probe;
13struct kmmio_fault_page;
14struct pt_regs;
15
16typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *,
17 struct pt_regs *, unsigned long addr);
18typedef void (*kmmio_post_handler_t)(struct kmmio_probe *,
19 unsigned long condition, struct pt_regs *);
20
21struct kmmio_probe {
22 struct list_head list;
23
24 /* start location of the probe point */
25 unsigned long addr;
26
27 /* length of the probe region */
28 unsigned long len;
29
30 /* Called before addr is executed. */
31 kmmio_pre_handler_t pre_handler;
32
33 /* Called after addr is executed, unless... */
34 kmmio_post_handler_t post_handler;
35};
36
37struct kmmio_fault_page {
38 struct list_head list;
39
40 /* location of the fault page */
41 unsigned long page;
42
43 int count;
44};
45
46/* kmmio is active by some kmmio_probes? */
47static inline int is_kmmio_active(void)
48{
49 extern unsigned int kmmio_count;
50 return kmmio_count;
51}
52
53int init_kmmio(void);
54void cleanup_kmmio(void);
55int register_kmmio_probe(struct kmmio_probe *p);
56void unregister_kmmio_probe(struct kmmio_probe *p);
57
58#endif /* _LINUX_KMMIO_H */
diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c
new file mode 100644
index 000000000000..73561fe85f03
--- /dev/null
+++ b/arch/x86/kernel/mmiotrace/mmio-mod.c
@@ -0,0 +1,527 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2005
17 * Jeff Muizelaar, 2006, 2007
18 * Pekka Paalanen, 2008 <pq@iki.fi>
19 *
20 * Derived from the read-mod example from relay-examples by Tom Zanussi.
21 */
22#include <linux/module.h>
23#include <linux/relay.h>
24#include <linux/debugfs.h>
25#include <linux/proc_fs.h>
26#include <asm/io.h>
27#include <linux/version.h>
28#include <linux/kallsyms.h>
29#include <asm/pgtable.h>
30#include <linux/mmiotrace.h>
31#include <asm/e820.h> /* for ISA_START_ADDRESS */
32
33#include "kmmio.h"
34#include "pf_in.h"
35
36/* This app's relay channel files will appear in /debug/mmio-trace */
37#define APP_DIR "mmio-trace"
38/* the marker injection file in /proc */
39#define MARKER_FILE "mmio-marker"
40
41#define MODULE_NAME "mmiotrace"
42
43struct trap_reason {
44 unsigned long addr;
45 unsigned long ip;
46 enum reason_type type;
47 int active_traces;
48};
49
50static struct trap_reason pf_reason[NR_CPUS];
51static struct mm_io_header_rw cpu_trace[NR_CPUS];
52
53static struct file_operations mmio_fops = {
54 .owner = THIS_MODULE,
55};
56
57static const size_t subbuf_size = 256*1024;
58static struct rchan *chan;
59static struct dentry *dir;
60static int suspended; /* XXX should this be per cpu? */
61static struct proc_dir_entry *proc_marker_file;
62
63/* module parameters */
64static unsigned int n_subbufs = 32*4;
65static unsigned long filter_offset;
66static int nommiotrace;
67static int ISA_trace;
68static int trace_pc;
69
70module_param(n_subbufs, uint, 0);
71module_param(filter_offset, ulong, 0);
72module_param(nommiotrace, bool, 0);
73module_param(ISA_trace, bool, 0);
74module_param(trace_pc, bool, 0);
75
76MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128.");
77MODULE_PARM_DESC(filter_offset, "Start address of traced mappings.");
78MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing.");
79MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range.");
80MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions.");
81
82static void record_timestamp(struct mm_io_header *header)
83{
84 struct timespec now;
85
86 getnstimeofday(&now);
87 header->sec = now.tv_sec;
88 header->nsec = now.tv_nsec;
89}
90
91/*
92 * Write callback for the /proc entry:
93 * Read a marker and write it to the mmio trace log
94 */
95static int write_marker(struct file *file, const char __user *buffer,
96 unsigned long count, void *data)
97{
98 char *event = NULL;
99 struct mm_io_header *headp;
100 int len = (count > 65535) ? 65535 : count;
101
102 event = kzalloc(sizeof(*headp) + len, GFP_KERNEL);
103 if (!event)
104 return -ENOMEM;
105
106 headp = (struct mm_io_header *)event;
107 headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT);
108 headp->data_len = len;
109 record_timestamp(headp);
110
111 if (copy_from_user(event + sizeof(*headp), buffer, len)) {
112 kfree(event);
113 return -EFAULT;
114 }
115
116 relay_write(chan, event, sizeof(*headp) + len);
117 kfree(event);
118 return len;
119}
120
121static void print_pte(unsigned long address)
122{
123 pgd_t *pgd = pgd_offset_k(address);
124 pud_t *pud = pud_offset(pgd, address);
125 pmd_t *pmd = pmd_offset(pud, address);
126 if (pmd_large(*pmd)) {
127 printk(KERN_EMERG MODULE_NAME ": 4MB pages are not "
128 "currently supported: %lx\n",
129 address);
130 BUG();
131 }
132 printk(KERN_DEBUG MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n",
133 address,
134 pte_val(*pte_offset_kernel(pmd, address)),
135 pte_val(*pte_offset_kernel(pmd, address)) & _PAGE_PRESENT);
136}
137
138/*
139 * For some reason the pre/post pairs have been called in an
140 * unmatched order. Report and die.
141 */
142static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
143{
144 const unsigned long cpu = smp_processor_id();
145 printk(KERN_EMERG MODULE_NAME ": unexpected fault for address: %lx, "
146 "last fault for address: %lx\n",
147 addr, pf_reason[cpu].addr);
148 print_pte(addr);
149#ifdef __i386__
150 print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip);
151 print_symbol(KERN_EMERG "last faulting EIP was at %s\n",
152 pf_reason[cpu].ip);
153 printk(KERN_EMERG
154 "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
155 regs->ax, regs->bx, regs->cx, regs->dx);
156 printk(KERN_EMERG
157 "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
158 regs->si, regs->di, regs->bp, regs->sp);
159#else
160 print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip);
161 print_symbol(KERN_EMERG "last faulting RIP was at %s\n",
162 pf_reason[cpu].ip);
163 printk(KERN_EMERG "rax: %016lx rcx: %016lx rdx: %016lx\n",
164 regs->ax, regs->cx, regs->dx);
165 printk(KERN_EMERG "rsi: %016lx rdi: %016lx "
166 "rbp: %016lx rsp: %016lx\n",
167 regs->si, regs->di, regs->bp, regs->sp);
168#endif
169 BUG();
170}
171
172static void pre(struct kmmio_probe *p, struct pt_regs *regs,
173 unsigned long addr)
174{
175 const unsigned long cpu = smp_processor_id();
176 const unsigned long instptr = instruction_pointer(regs);
177 const enum reason_type type = get_ins_type(instptr);
178
179 /* it doesn't make sense to have more than one active trace per cpu */
180 if (pf_reason[cpu].active_traces)
181 die_kmmio_nesting_error(regs, addr);
182 else
183 pf_reason[cpu].active_traces++;
184
185 pf_reason[cpu].type = type;
186 pf_reason[cpu].addr = addr;
187 pf_reason[cpu].ip = instptr;
188
189 cpu_trace[cpu].header.type = MMIO_MAGIC;
190 cpu_trace[cpu].header.pid = 0;
191 cpu_trace[cpu].header.data_len = sizeof(struct mm_io_rw);
192 cpu_trace[cpu].rw.address = addr;
193
194 /*
195 * Only record the program counter when requested.
196 * It may taint clean-room reverse engineering.
197 */
198 if (trace_pc)
199 cpu_trace[cpu].rw.pc = instptr;
200 else
201 cpu_trace[cpu].rw.pc = 0;
202
203 record_timestamp(&cpu_trace[cpu].header);
204
205 switch (type) {
206 case REG_READ:
207 cpu_trace[cpu].header.type |=
208 (MMIO_READ << MMIO_OPCODE_SHIFT) |
209 (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT);
210 break;
211 case REG_WRITE:
212 cpu_trace[cpu].header.type |=
213 (MMIO_WRITE << MMIO_OPCODE_SHIFT) |
214 (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT);
215 cpu_trace[cpu].rw.value = get_ins_reg_val(instptr, regs);
216 break;
217 case IMM_WRITE:
218 cpu_trace[cpu].header.type |=
219 (MMIO_WRITE << MMIO_OPCODE_SHIFT) |
220 (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT);
221 cpu_trace[cpu].rw.value = get_ins_imm_val(instptr);
222 break;
223 default:
224 {
225 unsigned char *ip = (unsigned char *)instptr;
226 cpu_trace[cpu].header.type |=
227 (MMIO_UNKNOWN_OP << MMIO_OPCODE_SHIFT);
228 cpu_trace[cpu].rw.value = (*ip) << 16 |
229 *(ip + 1) << 8 |
230 *(ip + 2);
231 }
232 }
233}
234
235static void post(struct kmmio_probe *p, unsigned long condition,
236 struct pt_regs *regs)
237{
238 const unsigned long cpu = smp_processor_id();
239
240 /* this should always return the active_trace count to 0 */
241 pf_reason[cpu].active_traces--;
242 if (pf_reason[cpu].active_traces) {
243 printk(KERN_EMERG MODULE_NAME ": unexpected post handler");
244 BUG();
245 }
246
247 switch (pf_reason[cpu].type) {
248 case REG_READ:
249 cpu_trace[cpu].rw.value = get_ins_reg_val(pf_reason[cpu].ip,
250 regs);
251 break;
252 default:
253 break;
254 }
255 relay_write(chan, &cpu_trace[cpu], sizeof(struct mm_io_header_rw));
256}
257
258/*
259 * subbuf_start() relay callback.
260 *
261 * Defined so that we know when events are dropped due to the buffer-full
262 * condition.
263 */
264static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf,
265 void *prev_subbuf, size_t prev_padding)
266{
267 if (relay_buf_full(buf)) {
268 if (!suspended) {
269 suspended = 1;
270 printk(KERN_ERR MODULE_NAME
271 ": cpu %d buffer full!!!\n",
272 smp_processor_id());
273 }
274 return 0;
275 } else if (suspended) {
276 suspended = 0;
277 printk(KERN_ERR MODULE_NAME
278 ": cpu %d buffer no longer full.\n",
279 smp_processor_id());
280 }
281
282 return 1;
283}
284
285/* file_create() callback. Creates relay file in debugfs. */
286static struct dentry *create_buf_file_handler(const char *filename,
287 struct dentry *parent,
288 int mode,
289 struct rchan_buf *buf,
290 int *is_global)
291{
292 struct dentry *buf_file;
293
294 mmio_fops.read = relay_file_operations.read;
295 mmio_fops.open = relay_file_operations.open;
296 mmio_fops.poll = relay_file_operations.poll;
297 mmio_fops.mmap = relay_file_operations.mmap;
298 mmio_fops.release = relay_file_operations.release;
299 mmio_fops.splice_read = relay_file_operations.splice_read;
300
301 buf_file = debugfs_create_file(filename, mode, parent, buf,
302 &mmio_fops);
303
304 return buf_file;
305}
306
307/* file_remove() default callback. Removes relay file in debugfs. */
308static int remove_buf_file_handler(struct dentry *dentry)
309{
310 debugfs_remove(dentry);
311 return 0;
312}
313
314static struct rchan_callbacks relay_callbacks = {
315 .subbuf_start = subbuf_start_handler,
316 .create_buf_file = create_buf_file_handler,
317 .remove_buf_file = remove_buf_file_handler,
318};
319
320/*
321 * create_channel - creates channel /debug/APP_DIR/cpuXXX
322 * Returns channel on success, NULL otherwise
323 */
324static struct rchan *create_channel(unsigned size, unsigned n)
325{
326 return relay_open("cpu", dir, size, n, &relay_callbacks, NULL);
327}
328
329/* destroy_channel - destroys channel /debug/APP_DIR/cpuXXX */
330static void destroy_channel(void)
331{
332 if (chan) {
333 relay_close(chan);
334 chan = NULL;
335 }
336}
337
338struct remap_trace {
339 struct list_head list;
340 struct kmmio_probe probe;
341};
342static LIST_HEAD(trace_list);
343static DEFINE_SPINLOCK(trace_list_lock);
344
345static void do_ioremap_trace_core(unsigned long offset, unsigned long size,
346 void __iomem *addr)
347{
348 struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL);
349 struct mm_io_header_map event = {
350 .header = {
351 .type = MMIO_MAGIC |
352 (MMIO_PROBE << MMIO_OPCODE_SHIFT),
353 .sec = 0,
354 .nsec = 0,
355 .pid = 0,
356 .data_len = sizeof(struct mm_io_map)
357 },
358 .map = {
359 .phys = offset,
360 .addr = (unsigned long)addr,
361 .len = size,
362 .pc = 0
363 }
364 };
365 record_timestamp(&event.header);
366
367 *trace = (struct remap_trace) {
368 .probe = {
369 .addr = (unsigned long)addr,
370 .len = size,
371 .pre_handler = pre,
372 .post_handler = post,
373 }
374 };
375
376 relay_write(chan, &event, sizeof(event));
377 spin_lock(&trace_list_lock);
378 list_add_tail(&trace->list, &trace_list);
379 spin_unlock(&trace_list_lock);
380 if (!nommiotrace)
381 register_kmmio_probe(&trace->probe);
382}
383
384static void ioremap_trace_core(unsigned long offset, unsigned long size,
385 void __iomem *addr)
386{
387 if ((filter_offset) && (offset != filter_offset))
388 return;
389
390 /* Don't trace the low PCI/ISA area, it's always mapped.. */
391 if (!ISA_trace && (offset < ISA_END_ADDRESS) &&
392 (offset + size > ISA_START_ADDRESS)) {
393 printk(KERN_NOTICE MODULE_NAME ": Ignoring map of low "
394 "PCI/ISA area (0x%lx-0x%lx)\n",
395 offset, offset + size);
396 return;
397 }
398 do_ioremap_trace_core(offset, size, addr);
399}
400
401void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size)
402{
403 void __iomem *p = ioremap_cache(offset, size);
404 printk(KERN_DEBUG MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n",
405 offset, size, p);
406 ioremap_trace_core(offset, size, p);
407 return p;
408}
409EXPORT_SYMBOL(ioremap_cache_trace);
410
411void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size)
412{
413 void __iomem *p = ioremap_nocache(offset, size);
414 printk(KERN_DEBUG MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n",
415 offset, size, p);
416 ioremap_trace_core(offset, size, p);
417 return p;
418}
419EXPORT_SYMBOL(ioremap_nocache_trace);
420
421void iounmap_trace(volatile void __iomem *addr)
422{
423 struct mm_io_header_map event = {
424 .header = {
425 .type = MMIO_MAGIC |
426 (MMIO_UNPROBE << MMIO_OPCODE_SHIFT),
427 .sec = 0,
428 .nsec = 0,
429 .pid = 0,
430 .data_len = sizeof(struct mm_io_map)
431 },
432 .map = {
433 .phys = 0,
434 .addr = (unsigned long)addr,
435 .len = 0,
436 .pc = 0
437 }
438 };
439 struct remap_trace *trace;
440 struct remap_trace *tmp;
441 printk(KERN_DEBUG MODULE_NAME ": Unmapping %p.\n", addr);
442 record_timestamp(&event.header);
443
444 spin_lock(&trace_list_lock);
445 list_for_each_entry_safe(trace, tmp, &trace_list, list) {
446 if ((unsigned long)addr == trace->probe.addr) {
447 if (!nommiotrace)
448 unregister_kmmio_probe(&trace->probe);
449 list_del(&trace->list);
450 kfree(trace);
451 break;
452 }
453 }
454 spin_unlock(&trace_list_lock);
455 relay_write(chan, &event, sizeof(event));
456 iounmap(addr);
457}
458EXPORT_SYMBOL(iounmap_trace);
459
460static void clear_trace_list(void)
461{
462 struct remap_trace *trace;
463 struct remap_trace *tmp;
464
465 spin_lock(&trace_list_lock);
466 list_for_each_entry_safe(trace, tmp, &trace_list, list) {
467 printk(KERN_WARNING MODULE_NAME ": purging non-iounmapped "
468 "trace @0x%08lx, size 0x%lx.\n",
469 trace->probe.addr, trace->probe.len);
470 if (!nommiotrace)
471 unregister_kmmio_probe(&trace->probe);
472 list_del(&trace->list);
473 kfree(trace);
474 break;
475 }
476 spin_unlock(&trace_list_lock);
477}
478
479static int __init init(void)
480{
481 if (n_subbufs < 2)
482 return -EINVAL;
483
484 dir = debugfs_create_dir(APP_DIR, NULL);
485 if (!dir) {
486 printk(KERN_ERR MODULE_NAME
487 ": Couldn't create relay app directory.\n");
488 return -ENOMEM;
489 }
490
491 chan = create_channel(subbuf_size, n_subbufs);
492 if (!chan) {
493 debugfs_remove(dir);
494 printk(KERN_ERR MODULE_NAME
495 ": relay app channel creation failed\n");
496 return -ENOMEM;
497 }
498
499 init_kmmio();
500
501 proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL);
502 if (proc_marker_file)
503 proc_marker_file->write_proc = write_marker;
504
505 printk(KERN_DEBUG MODULE_NAME ": loaded.\n");
506 if (nommiotrace)
507 printk(KERN_DEBUG MODULE_NAME ": MMIO tracing disabled.\n");
508 if (ISA_trace)
509 printk(KERN_WARNING MODULE_NAME
510 ": Warning! low ISA range will be traced.\n");
511 return 0;
512}
513
514static void __exit cleanup(void)
515{
516 printk(KERN_DEBUG MODULE_NAME ": unload...\n");
517 clear_trace_list();
518 cleanup_kmmio();
519 remove_proc_entry(MARKER_FILE, NULL);
520 destroy_channel();
521 if (dir)
522 debugfs_remove(dir);
523}
524
525module_init(init);
526module_exit(cleanup);
527MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/mmiotrace/pf_in.c b/arch/x86/kernel/mmiotrace/pf_in.c
new file mode 100644
index 000000000000..67ea520dde62
--- /dev/null
+++ b/arch/x86/kernel/mmiotrace/pf_in.c
@@ -0,0 +1,489 @@
1/*
2 * Fault Injection Test harness (FI)
3 * Copyright (C) Intel Crop.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
18 * USA.
19 *
20 */
21
22/* $Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp $
23 * Copyright by Intel Crop., 2002
24 * Louis Zhuang (louis.zhuang@intel.com)
25 *
26 * Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007
27 */
28
29#include <linux/module.h>
30#include <linux/ptrace.h> /* struct pt_regs */
31#include "pf_in.h"
32
33#ifdef __i386__
34/* IA32 Manual 3, 2-1 */
35static unsigned char prefix_codes[] = {
36 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64,
37 0x65, 0x2E, 0x3E, 0x66, 0x67
38};
39/* IA32 Manual 3, 3-432*/
40static unsigned int reg_rop[] = {
41 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
42};
43static unsigned int reg_wop[] = { 0x88, 0x89 };
44static unsigned int imm_wop[] = { 0xC6, 0xC7 };
45/* IA32 Manual 3, 3-432*/
46static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 };
47static unsigned int rw32[] = {
48 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
49};
50static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F };
51static unsigned int mw16[] = { 0xB70F, 0xBF0F };
52static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 };
53static unsigned int mw64[] = {};
54#else /* not __i386__ */
55static unsigned char prefix_codes[] = {
56 0x66, 0x67, 0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36,
57 0xF0, 0xF3, 0xF2,
58 /* REX Prefixes */
59 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
60 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f
61};
62/* AMD64 Manual 3, Appendix A*/
63static unsigned int reg_rop[] = {
64 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
65};
66static unsigned int reg_wop[] = { 0x88, 0x89 };
67static unsigned int imm_wop[] = { 0xC6, 0xC7 };
68static unsigned int rw8[] = { 0xC6, 0x88, 0x8A };
69static unsigned int rw32[] = {
70 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
71};
72/* 8 bit only */
73static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F };
74/* 16 bit only */
75static unsigned int mw16[] = { 0xB70F, 0xBF0F };
76/* 16 or 32 bit */
77static unsigned int mw32[] = { 0xC7 };
78/* 16, 32 or 64 bit */
79static unsigned int mw64[] = { 0x89, 0x8B };
80#endif /* not __i386__ */
81
82static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged,
83 int *rexr)
84{
85 int i;
86 unsigned char *p = addr;
87 *shorted = 0;
88 *enlarged = 0;
89 *rexr = 0;
90
91restart:
92 for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) {
93 if (*p == prefix_codes[i]) {
94 if (*p == 0x66)
95 *shorted = 1;
96#ifdef __amd64__
97 if ((*p & 0xf8) == 0x48)
98 *enlarged = 1;
99 if ((*p & 0xf4) == 0x44)
100 *rexr = 1;
101#endif
102 p++;
103 goto restart;
104 }
105 }
106
107 return (p - addr);
108}
109
110static int get_opcode(unsigned char *addr, unsigned int *opcode)
111{
112 int len;
113
114 if (*addr == 0x0F) {
115 /* 0x0F is extension instruction */
116 *opcode = *(unsigned short *)addr;
117 len = 2;
118 } else {
119 *opcode = *addr;
120 len = 1;
121 }
122
123 return len;
124}
125
126#define CHECK_OP_TYPE(opcode, array, type) \
127 for (i = 0; i < ARRAY_SIZE(array); i++) { \
128 if (array[i] == opcode) { \
129 rv = type; \
130 goto exit; \
131 } \
132 }
133
134enum reason_type get_ins_type(unsigned long ins_addr)
135{
136 unsigned int opcode;
137 unsigned char *p;
138 int shorted, enlarged, rexr;
139 int i;
140 enum reason_type rv = OTHERS;
141
142 p = (unsigned char *)ins_addr;
143 p += skip_prefix(p, &shorted, &enlarged, &rexr);
144 p += get_opcode(p, &opcode);
145
146 CHECK_OP_TYPE(opcode, reg_rop, REG_READ);
147 CHECK_OP_TYPE(opcode, reg_wop, REG_WRITE);
148 CHECK_OP_TYPE(opcode, imm_wop, IMM_WRITE);
149
150exit:
151 return rv;
152}
153#undef CHECK_OP_TYPE
154
155static unsigned int get_ins_reg_width(unsigned long ins_addr)
156{
157 unsigned int opcode;
158 unsigned char *p;
159 int i, shorted, enlarged, rexr;
160
161 p = (unsigned char *)ins_addr;
162 p += skip_prefix(p, &shorted, &enlarged, &rexr);
163 p += get_opcode(p, &opcode);
164
165 for (i = 0; i < ARRAY_SIZE(rw8); i++)
166 if (rw8[i] == opcode)
167 return 1;
168
169 for (i = 0; i < ARRAY_SIZE(rw32); i++)
170 if (rw32[i] == opcode)
171 return (shorted ? 2 : (enlarged ? 8 : 4));
172
173 printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
174 return 0;
175}
176
177unsigned int get_ins_mem_width(unsigned long ins_addr)
178{
179 unsigned int opcode;
180 unsigned char *p;
181 int i, shorted, enlarged, rexr;
182
183 p = (unsigned char *)ins_addr;
184 p += skip_prefix(p, &shorted, &enlarged, &rexr);
185 p += get_opcode(p, &opcode);
186
187 for (i = 0; i < ARRAY_SIZE(mw8); i++)
188 if (mw8[i] == opcode)
189 return 1;
190
191 for (i = 0; i < ARRAY_SIZE(mw16); i++)
192 if (mw16[i] == opcode)
193 return 2;
194
195 for (i = 0; i < ARRAY_SIZE(mw32); i++)
196 if (mw32[i] == opcode)
197 return shorted ? 2 : 4;
198
199 for (i = 0; i < ARRAY_SIZE(mw64); i++)
200 if (mw64[i] == opcode)
201 return shorted ? 2 : (enlarged ? 8 : 4);
202
203 printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
204 return 0;
205}
206
207/*
208 * Define register ident in mod/rm byte.
209 * Note: these are NOT the same as in ptrace-abi.h.
210 */
211enum {
212 arg_AL = 0,
213 arg_CL = 1,
214 arg_DL = 2,
215 arg_BL = 3,
216 arg_AH = 4,
217 arg_CH = 5,
218 arg_DH = 6,
219 arg_BH = 7,
220
221 arg_AX = 0,
222 arg_CX = 1,
223 arg_DX = 2,
224 arg_BX = 3,
225 arg_SP = 4,
226 arg_BP = 5,
227 arg_SI = 6,
228 arg_DI = 7,
229#ifdef __amd64__
230 arg_R8 = 8,
231 arg_R9 = 9,
232 arg_R10 = 10,
233 arg_R11 = 11,
234 arg_R12 = 12,
235 arg_R13 = 13,
236 arg_R14 = 14,
237 arg_R15 = 15
238#endif
239};
240
241static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
242{
243 unsigned char *rv = NULL;
244
245 switch (no) {
246 case arg_AL:
247 rv = (unsigned char *)&regs->ax;
248 break;
249 case arg_BL:
250 rv = (unsigned char *)&regs->bx;
251 break;
252 case arg_CL:
253 rv = (unsigned char *)&regs->cx;
254 break;
255 case arg_DL:
256 rv = (unsigned char *)&regs->dx;
257 break;
258 case arg_AH:
259 rv = 1 + (unsigned char *)&regs->ax;
260 break;
261 case arg_BH:
262 rv = 1 + (unsigned char *)&regs->bx;
263 break;
264 case arg_CH:
265 rv = 1 + (unsigned char *)&regs->cx;
266 break;
267 case arg_DH:
268 rv = 1 + (unsigned char *)&regs->dx;
269 break;
270#ifdef __amd64__
271 case arg_R8:
272 rv = (unsigned char *)&regs->r8;
273 break;
274 case arg_R9:
275 rv = (unsigned char *)&regs->r9;
276 break;
277 case arg_R10:
278 rv = (unsigned char *)&regs->r10;
279 break;
280 case arg_R11:
281 rv = (unsigned char *)&regs->r11;
282 break;
283 case arg_R12:
284 rv = (unsigned char *)&regs->r12;
285 break;
286 case arg_R13:
287 rv = (unsigned char *)&regs->r13;
288 break;
289 case arg_R14:
290 rv = (unsigned char *)&regs->r14;
291 break;
292 case arg_R15:
293 rv = (unsigned char *)&regs->r15;
294 break;
295#endif
296 default:
297 printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
298 break;
299 }
300 return rv;
301}
302
303static unsigned long *get_reg_w32(int no, struct pt_regs *regs)
304{
305 unsigned long *rv = NULL;
306
307 switch (no) {
308 case arg_AX:
309 rv = &regs->ax;
310 break;
311 case arg_BX:
312 rv = &regs->bx;
313 break;
314 case arg_CX:
315 rv = &regs->cx;
316 break;
317 case arg_DX:
318 rv = &regs->dx;
319 break;
320 case arg_SP:
321 rv = &regs->sp;
322 break;
323 case arg_BP:
324 rv = &regs->bp;
325 break;
326 case arg_SI:
327 rv = &regs->si;
328 break;
329 case arg_DI:
330 rv = &regs->di;
331 break;
332#ifdef __amd64__
333 case arg_R8:
334 rv = &regs->r8;
335 break;
336 case arg_R9:
337 rv = &regs->r9;
338 break;
339 case arg_R10:
340 rv = &regs->r10;
341 break;
342 case arg_R11:
343 rv = &regs->r11;
344 break;
345 case arg_R12:
346 rv = &regs->r12;
347 break;
348 case arg_R13:
349 rv = &regs->r13;
350 break;
351 case arg_R14:
352 rv = &regs->r14;
353 break;
354 case arg_R15:
355 rv = &regs->r15;
356 break;
357#endif
358 default:
359 printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
360 }
361
362 return rv;
363}
364
365unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
366{
367 unsigned int opcode;
368 unsigned char mod_rm;
369 int reg;
370 unsigned char *p;
371 int i, shorted, enlarged, rexr;
372 unsigned long rv;
373
374 p = (unsigned char *)ins_addr;
375 p += skip_prefix(p, &shorted, &enlarged, &rexr);
376 p += get_opcode(p, &opcode);
377 for (i = 0; i < ARRAY_SIZE(reg_rop); i++)
378 if (reg_rop[i] == opcode) {
379 rv = REG_READ;
380 goto do_work;
381 }
382
383 for (i = 0; i < ARRAY_SIZE(reg_wop); i++)
384 if (reg_wop[i] == opcode) {
385 rv = REG_WRITE;
386 goto do_work;
387 }
388
389 printk(KERN_ERR "mmiotrace: Not a register instruction, opcode "
390 "0x%02x\n", opcode);
391 goto err;
392
393do_work:
394 mod_rm = *p;
395 reg = ((mod_rm >> 3) & 0x7) | (rexr << 3);
396 switch (get_ins_reg_width(ins_addr)) {
397 case 1:
398 return *get_reg_w8(reg, regs);
399
400 case 2:
401 return *(unsigned short *)get_reg_w32(reg, regs);
402
403 case 4:
404 return *(unsigned int *)get_reg_w32(reg, regs);
405
406#ifdef __amd64__
407 case 8:
408 return *(unsigned long *)get_reg_w32(reg, regs);
409#endif
410
411 default:
412 printk(KERN_ERR "mmiotrace: Error width# %d\n", reg);
413 }
414
415err:
416 return 0;
417}
418
419unsigned long get_ins_imm_val(unsigned long ins_addr)
420{
421 unsigned int opcode;
422 unsigned char mod_rm;
423 unsigned char mod;
424 unsigned char *p;
425 int i, shorted, enlarged, rexr;
426 unsigned long rv;
427
428 p = (unsigned char *)ins_addr;
429 p += skip_prefix(p, &shorted, &enlarged, &rexr);
430 p += get_opcode(p, &opcode);
431 for (i = 0; i < ARRAY_SIZE(imm_wop); i++)
432 if (imm_wop[i] == opcode) {
433 rv = IMM_WRITE;
434 goto do_work;
435 }
436
437 printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode "
438 "0x%02x\n", opcode);
439 goto err;
440
441do_work:
442 mod_rm = *p;
443 mod = mod_rm >> 6;
444 p++;
445 switch (mod) {
446 case 0:
447 /* if r/m is 5 we have a 32 disp (IA32 Manual 3, Table 2-2) */
448 /* AMD64: XXX Check for address size prefix? */
449 if ((mod_rm & 0x7) == 0x5)
450 p += 4;
451 break;
452
453 case 1:
454 p += 1;
455 break;
456
457 case 2:
458 p += 4;
459 break;
460
461 case 3:
462 default:
463 printk(KERN_ERR "mmiotrace: not a memory access instruction "
464 "at 0x%lx, rm_mod=0x%02x\n",
465 ins_addr, mod_rm);
466 }
467
468 switch (get_ins_reg_width(ins_addr)) {
469 case 1:
470 return *(unsigned char *)p;
471
472 case 2:
473 return *(unsigned short *)p;
474
475 case 4:
476 return *(unsigned int *)p;
477
478#ifdef __amd64__
479 case 8:
480 return *(unsigned long *)p;
481#endif
482
483 default:
484 printk(KERN_ERR "mmiotrace: Error: width.\n");
485 }
486
487err:
488 return 0;
489}
diff --git a/arch/x86/kernel/mmiotrace/pf_in.h b/arch/x86/kernel/mmiotrace/pf_in.h
new file mode 100644
index 000000000000..e05341a51a27
--- /dev/null
+++ b/arch/x86/kernel/mmiotrace/pf_in.h
@@ -0,0 +1,39 @@
1/*
2 * Fault Injection Test harness (FI)
3 * Copyright (C) Intel Crop.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
18 * USA.
19 *
20 */
21
22#ifndef __PF_H_
23#define __PF_H_
24
25enum reason_type {
26 NOT_ME, /* page fault is not in regions */
27 NOTHING, /* access others point in regions */
28 REG_READ, /* read from addr to reg */
29 REG_WRITE, /* write from reg to addr */
30 IMM_WRITE, /* write from imm to addr */
31 OTHERS /* Other instructions can not intercept */
32};
33
34enum reason_type get_ins_type(unsigned long ins_addr);
35unsigned int get_ins_mem_width(unsigned long ins_addr);
36unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs);
37unsigned long get_ins_imm_val(unsigned long ins_addr);
38
39#endif /* __PF_H_ */
diff --git a/arch/x86/kernel/mmiotrace/testmmiotrace.c b/arch/x86/kernel/mmiotrace/testmmiotrace.c
new file mode 100644
index 000000000000..40e66b0e6480
--- /dev/null
+++ b/arch/x86/kernel/mmiotrace/testmmiotrace.c
@@ -0,0 +1,77 @@
1/*
2 * Written by Pekka Paalanen, 2008 <pq@iki.fi>
3 */
4#include <linux/module.h>
5#include <asm/io.h>
6
7extern void __iomem *ioremap_nocache_trace(unsigned long offset,
8 unsigned long size);
9extern void iounmap_trace(volatile void __iomem *addr);
10
11#define MODULE_NAME "testmmiotrace"
12
13static unsigned long mmio_address;
14module_param(mmio_address, ulong, 0);
15MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB.");
16
17static void do_write_test(void __iomem *p)
18{
19 unsigned int i;
20 for (i = 0; i < 256; i++)
21 iowrite8(i, p + i);
22 for (i = 1024; i < (5 * 1024); i += 2)
23 iowrite16(i * 12 + 7, p + i);
24 for (i = (5 * 1024); i < (16 * 1024); i += 4)
25 iowrite32(i * 212371 + 13, p + i);
26}
27
28static void do_read_test(void __iomem *p)
29{
30 unsigned int i;
31 volatile unsigned int v;
32 for (i = 0; i < 256; i++)
33 v = ioread8(p + i);
34 for (i = 1024; i < (5 * 1024); i += 2)
35 v = ioread16(p + i);
36 for (i = (5 * 1024); i < (16 * 1024); i += 4)
37 v = ioread32(p + i);
38}
39
40static void do_test(void)
41{
42 void __iomem *p = ioremap_nocache_trace(mmio_address, 0x4000);
43 if (!p) {
44 printk(KERN_ERR MODULE_NAME ": could not ioremap IO memory, "
45 "aborting.\n");
46 return;
47 }
48 do_write_test(p);
49 do_read_test(p);
50 iounmap_trace(p);
51}
52
53static int __init init(void)
54{
55 if (mmio_address == 0) {
56 printk(KERN_ERR MODULE_NAME ": you have to use the module "
57 "argument mmio_address.\n");
58 printk(KERN_ERR MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
59 " YOU REALLY KNOW WHAT YOU ARE DOING!\n");
60 return -ENXIO;
61 }
62
63 printk(KERN_WARNING MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
64 "in PCI address space, and writing "
65 "rubbish in there.\n", mmio_address);
66 do_test();
67 return 0;
68}
69
70static void __exit cleanup(void)
71{
72 printk(KERN_DEBUG MODULE_NAME ": unloaded.\n");
73}
74
75module_init(init);
76module_exit(cleanup);
77MODULE_LICENSE("GPL");
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
new file mode 100644
index 000000000000..cb247825f3ec
--- /dev/null
+++ b/include/linux/mmiotrace.h
@@ -0,0 +1,62 @@
1#ifndef MMIOTRACE_H
2#define MMIOTRACE_H
3
4#include <asm/types.h>
5
6#define MMIO_VERSION 0x04
7
8/* mm_io_header.type */
9#define MMIO_OPCODE_MASK 0xff
10#define MMIO_OPCODE_SHIFT 0
11#define MMIO_WIDTH_MASK 0xff00
12#define MMIO_WIDTH_SHIFT 8
13#define MMIO_MAGIC (0x6f000000 | (MMIO_VERSION<<16))
14#define MMIO_MAGIC_MASK 0xffff0000
15
16enum mm_io_opcode { /* payload type: */
17 MMIO_READ = 0x1, /* struct mm_io_rw */
18 MMIO_WRITE = 0x2, /* struct mm_io_rw */
19 MMIO_PROBE = 0x3, /* struct mm_io_map */
20 MMIO_UNPROBE = 0x4, /* struct mm_io_map */
21 MMIO_MARKER = 0x5, /* raw char data */
22 MMIO_UNKNOWN_OP = 0x6, /* struct mm_io_rw */
23};
24
25struct mm_io_header {
26 __u32 type;
27 __u32 sec; /* timestamp */
28 __u32 nsec;
29 __u32 pid; /* PID of the process, or 0 for kernel core */
30 __u16 data_len; /* length of the following payload */
31};
32
33struct mm_io_rw {
34 __u64 address; /* virtual address of register */
35 __u64 value;
36 __u64 pc; /* optional program counter */
37};
38
39struct mm_io_map {
40 __u64 phys; /* base address in PCI space */
41 __u64 addr; /* base virtual address */
42 __u64 len; /* mapping size */
43 __u64 pc; /* optional program counter */
44};
45
46
47/*
48 * These structures are used to allow a single relay_write()
49 * call to write a full packet.
50 */
51
52struct mm_io_header_rw {
53 struct mm_io_header header;
54 struct mm_io_rw rw;
55} __attribute__((packed));
56
57struct mm_io_header_map {
58 struct mm_io_header header;
59 struct mm_io_map map;
60} __attribute__((packed));
61
62#endif /* MMIOTRACE_H */