aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-10 05:43:06 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-10 05:43:06 -0400
commit5373fdbdc1dba69aa956098650f71b731d471885 (patch)
tree8d9f07539896a696352818820c9c5f6612370882
parentbac0c9103b31c3dd83ad9d731dd9834e2ba75e4f (diff)
parent4d51c7587bb13dbb2fafcad6c0b5231bd864b55f (diff)
Merge branch 'tracing/mmiotrace' into auto-ftrace-next
-rw-r--r--Documentation/tracers/mmiotrace.txt164
-rw-r--r--arch/x86/Kconfig.debug32
-rw-r--r--arch/x86/mm/Makefile5
-rw-r--r--arch/x86/mm/fault.c57
-rw-r--r--arch/x86/mm/ioremap.c11
-rw-r--r--arch/x86/mm/kmmio.c510
-rw-r--r--arch/x86/mm/mmio-mod.c515
-rw-r--r--arch/x86/mm/pageattr.c1
-rw-r--r--arch/x86/mm/pf_in.c489
-rw-r--r--arch/x86/mm/pf_in.h39
-rw-r--r--arch/x86/mm/testmmiotrace.c71
-rw-r--r--include/asm-x86/kdebug.h9
-rw-r--r--include/linux/mmiotrace.h85
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/trace.c42
-rw-r--r--kernel/trace/trace.h14
-rw-r--r--kernel/trace/trace_mmiotrace.c295
17 files changed, 2274 insertions, 66 deletions
diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt
new file mode 100644
index 000000000000..a4afb560a45b
--- /dev/null
+++ b/Documentation/tracers/mmiotrace.txt
@@ -0,0 +1,164 @@
1 In-kernel memory-mapped I/O tracing
2
3
4Home page and links to optional user space tools:
5
6 http://nouveau.freedesktop.org/wiki/MmioTrace
7
8MMIO tracing was originally developed by Intel around 2003 for their Fault
9Injection Test Harness. In Dec 2006 - Jan 2007, using the code from Intel,
10Jeff Muizelaar created a tool for tracing MMIO accesses with the Nouveau
11project in mind. Since then many people have contributed.
12
13Mmiotrace was built for reverse engineering any memory-mapped IO device with
14the Nouveau project as the first real user. Only x86 and x86_64 architectures
15are supported.
16
17Out-of-tree mmiotrace was originally modified for mainline inclusion and
18ftrace framework by Pekka Paalanen <pq@iki.fi>.
19
20
21Preparation
22-----------
23
24Mmiotrace feature is compiled in by the CONFIG_MMIOTRACE option. Tracing is
25disabled by default, so it is safe to have this set to yes. SMP systems are
26supported, but tracing is unreliable and may miss events if more than one CPU
27is on-line, therefore mmiotrace takes all but one CPU off-line during run-time
28activation. You can re-enable CPUs by hand, but you have been warned, there
29is no way to automatically detect if you are losing events due to CPUs racing.
30
31
32Usage Quick Reference
33---------------------
34
35$ mount -t debugfs debugfs /debug
36$ echo mmiotrace > /debug/tracing/current_tracer
37$ cat /debug/tracing/trace_pipe > mydump.txt &
38Start X or whatever.
39$ echo "X is up" > /debug/tracing/marker
40$ echo none > /debug/tracing/current_tracer
41Check for lost events.
42
43
44Usage
45-----
46
47Make sure debugfs is mounted to /debug. If not, (requires root privileges)
48$ mount -t debugfs debugfs /debug
49
50Check that the driver you are about to trace is not loaded.
51
52Activate mmiotrace (requires root privileges):
53$ echo mmiotrace > /debug/tracing/current_tracer
54
55Start storing the trace:
56$ cat /debug/tracing/trace_pipe > mydump.txt &
57The 'cat' process should stay running (sleeping) in the background.
58
59Load the driver you want to trace and use it. Mmiotrace will only catch MMIO
60accesses to areas that are ioremapped while mmiotrace is active.
61
62[Unimplemented feature:]
63During tracing you can place comments (markers) into the trace by
64$ echo "X is up" > /debug/tracing/marker
65This makes it easier to see which part of the (huge) trace corresponds to
66which action. It is recommended to place descriptive markers about what you
67do.
68
69Shut down mmiotrace (requires root privileges):
70$ echo none > /debug/tracing/current_tracer
71The 'cat' process exits. If it does not, kill it by issuing 'fg' command and
72pressing ctrl+c.
73
74Check that mmiotrace did not lose events due to a buffer filling up. Either
75$ grep -i lost mydump.txt
76which tells you exactly how many events were lost, or use
77$ dmesg
78to view your kernel log and look for "mmiotrace has lost events" warning. If
79events were lost, the trace is incomplete. You should enlarge the buffers and
80try again. Buffers are enlarged by first seeing how large the current buffers
81are:
82$ cat /debug/tracing/trace_entries
83gives you a number. Approximately double this number and write it back, for
84instance:
85$ echo 128000 > /debug/tracing/trace_entries
86Then start again from the top.
87
88If you are doing a trace for a driver project, e.g. Nouveau, you should also
89do the following before sending your results:
90$ lspci -vvv > lspci.txt
91$ dmesg > dmesg.txt
92$ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt
93and then send the .tar.gz file. The trace compresses considerably. Replace
94"pciid" and "nick" with the PCI ID or model name of your piece of hardware
95under investigation and your nick name.
96
97
98How Mmiotrace Works
99-------------------
100
101Access to hardware IO-memory is gained by mapping addresses from PCI bus by
102calling one of the ioremap_*() functions. Mmiotrace is hooked into the
103__ioremap() function and gets called whenever a mapping is created. Mapping is
104an event that is recorded into the trace log. Note, that ISA range mappings
105are not caught, since the mapping always exists and is returned directly.
106
107MMIO accesses are recorded via page faults. Just before __ioremap() returns,
108the mapped pages are marked as not present. Any access to the pages causes a
109fault. The page fault handler calls mmiotrace to handle the fault. Mmiotrace
110marks the page present, sets TF flag to achieve single stepping and exits the
111fault handler. The instruction that faulted is executed and debug trap is
112entered. Here mmiotrace again marks the page as not present. The instruction
113is decoded to get the type of operation (read/write), data width and the value
114read or written. These are stored to the trace log.
115
116Setting the page present in the page fault handler has a race condition on SMP
117machines. During the single stepping other CPUs may run freely on that page
118and events can be missed without a notice. Re-enabling other CPUs during
119tracing is discouraged.
120
121
122Trace Log Format
123----------------
124
125The raw log is text and easily filtered with e.g. grep and awk. One record is
126one line in the log. A record starts with a keyword, followed by keyword
127dependant arguments. Arguments are separated by a space, or continue until the
128end of line. The format for version 20070824 is as follows:
129
130Explanation Keyword Space separated arguments
131---------------------------------------------------------------------------
132
133read event R width, timestamp, map id, physical, value, PC, PID
134write event W width, timestamp, map id, physical, value, PC, PID
135ioremap event MAP timestamp, map id, physical, virtual, length, PC, PID
136iounmap event UNMAP timestamp, map id, PC, PID
137marker MARK timestamp, text
138version VERSION the string "20070824"
139info for reader LSPCI one line from lspci -v
140PCI address map PCIDEV space separated /proc/bus/pci/devices data
141unk. opcode UNKNOWN timestamp, map id, physical, data, PC, PID
142
143Timestamp is in seconds with decimals. Physical is a PCI bus address, virtual
144is a kernel virtual address. Width is the data width in bytes and value is the
145data value. Map id is an arbitrary id number identifying the mapping that was
146used in an operation. PC is the program counter and PID is process id. PC is
147zero if it is not recorded. PID is always zero as tracing MMIO accesses
148originating in user space memory is not yet supported.
149
150For instance, the following awk filter will pass all 32-bit writes that target
151physical addresses in the range [0xfb73ce40, 0xfb800000[
152
153$ awk '/W 4 / { adr=strtonum($5); if (adr >= 0xfb73ce40 &&
154adr < 0xfb800000) print; }'
155
156
157Tools for Developers
158--------------------
159
160The user space tools include utilities for:
161- replacing numeric addresses and values with hardware register names
162- replaying MMIO logs, i.e., re-executing the recorded writes
163
164
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index f395fd537c5c..f7169edfbeab 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -172,13 +172,33 @@ config IOMMU_LEAK
172 Add a simple leak tracer to the IOMMU code. This is useful when you 172 Add a simple leak tracer to the IOMMU code. This is useful when you
173 are debugging a buggy device driver that leaks IOMMU mappings. 173 are debugging a buggy device driver that leaks IOMMU mappings.
174 174
175config PAGE_FAULT_HANDLERS 175config MMIOTRACE_HOOKS
176 bool "Custom page fault handlers" 176 bool
177 depends on DEBUG_KERNEL 177
178config MMIOTRACE
179 bool "Memory mapped IO tracing"
180 depends on DEBUG_KERNEL && PCI
181 select TRACING
182 select MMIOTRACE_HOOKS
183 default y
184 help
185 Mmiotrace traces Memory Mapped I/O access and is meant for
186 debugging and reverse engineering. It is called from the ioremap
187 implementation and works via page faults. Tracing is disabled by
188 default and can be enabled at run-time.
189
190 See Documentation/tracers/mmiotrace.txt.
191 If you are not helping to develop drivers, say N.
192
193config MMIOTRACE_TEST
194 tristate "Test module for mmiotrace"
195 depends on MMIOTRACE && m
178 help 196 help
179 Allow the use of custom page fault handlers. A kernel module may 197 This is a dumb module for testing mmiotrace. It is very dangerous
180 register a function that is called on every page fault. Custom 198 as it will write garbage to IO memory starting at a given address.
181 handlers are used by some debugging and reverse engineering tools. 199 However, it should be safe to use on e.g. unused portion of VRAM.
200
201 Say N, unless you absolutely know what you are doing.
182 202
183# 203#
184# IO delay types: 204# IO delay types:
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index b7b3e4c7cfc9..07dab503c9e3 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,6 +8,11 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
8 8
9obj-$(CONFIG_HIGHMEM) += highmem_32.o 9obj-$(CONFIG_HIGHMEM) += highmem_32.o
10 10
11obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
12obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
13mmiotrace-y := pf_in.o mmio-mod.o
14obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
15
11ifeq ($(CONFIG_X86_32),y) 16ifeq ($(CONFIG_X86_32),y)
12obj-$(CONFIG_NUMA) += discontig_32.o 17obj-$(CONFIG_NUMA) += discontig_32.o
13else 18else
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 42394b353c6a..0a778e3c43ee 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,6 +10,7 @@
10#include <linux/string.h> 10#include <linux/string.h>
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/ptrace.h> 12#include <linux/ptrace.h>
13#include <linux/mmiotrace.h>
13#include <linux/mman.h> 14#include <linux/mman.h>
14#include <linux/mm.h> 15#include <linux/mm.h>
15#include <linux/smp.h> 16#include <linux/smp.h>
@@ -49,58 +50,14 @@
49#define PF_RSVD (1<<3) 50#define PF_RSVD (1<<3)
50#define PF_INSTR (1<<4) 51#define PF_INSTR (1<<4)
51 52
52#ifdef CONFIG_PAGE_FAULT_HANDLERS 53static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
53static HLIST_HEAD(pf_handlers); /* protected by RCU */
54static DEFINE_SPINLOCK(pf_handlers_writer);
55
56void register_page_fault_handler(struct pf_handler *new_pfh)
57{
58 unsigned long flags;
59 spin_lock_irqsave(&pf_handlers_writer, flags);
60 hlist_add_head_rcu(&new_pfh->hlist, &pf_handlers);
61 spin_unlock_irqrestore(&pf_handlers_writer, flags);
62}
63EXPORT_SYMBOL_GPL(register_page_fault_handler);
64
65/**
66 * unregister_page_fault_handler:
67 * The caller must ensure @old_pfh is not in use anymore before freeing it.
68 * This function does not guarantee it. The list of handlers is protected by
69 * RCU, so you can do this by e.g. calling synchronize_rcu().
70 */
71void unregister_page_fault_handler(struct pf_handler *old_pfh)
72{ 54{
73 unsigned long flags; 55#ifdef CONFIG_MMIOTRACE_HOOKS
74 spin_lock_irqsave(&pf_handlers_writer, flags); 56 if (unlikely(is_kmmio_active()))
75 hlist_del_rcu(&old_pfh->hlist); 57 if (kmmio_handler(regs, addr) == 1)
76 spin_unlock_irqrestore(&pf_handlers_writer, flags); 58 return -1;
77}
78EXPORT_SYMBOL_GPL(unregister_page_fault_handler);
79#endif 59#endif
80
81/* returns non-zero if do_page_fault() should return */
82static int handle_custom_pf(struct pt_regs *regs, unsigned long error_code,
83 unsigned long address)
84{
85#ifdef CONFIG_PAGE_FAULT_HANDLERS
86 int ret = 0;
87 struct pf_handler *cur;
88 struct hlist_node *ncur;
89
90 if (hlist_empty(&pf_handlers))
91 return 0;
92
93 rcu_read_lock();
94 hlist_for_each_entry_rcu(cur, ncur, &pf_handlers, hlist) {
95 ret = cur->handler(regs, error_code, address);
96 if (ret)
97 break;
98 }
99 rcu_read_unlock();
100 return ret;
101#else
102 return 0; 60 return 0;
103#endif
104} 61}
105 62
106static inline int notify_page_fault(struct pt_regs *regs) 63static inline int notify_page_fault(struct pt_regs *regs)
@@ -660,7 +617,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
660 617
661 if (notify_page_fault(regs)) 618 if (notify_page_fault(regs))
662 return; 619 return;
663 if (handle_custom_pf(regs, error_code, address)) 620 if (unlikely(kmmio_fault(regs, address)))
664 return; 621 return;
665 622
666 /* 623 /*
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 2b2bb3f9b683..e92aa461f4d6 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -12,6 +12,7 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/vmalloc.h> 14#include <linux/vmalloc.h>
15#include <linux/mmiotrace.h>
15 16
16#include <asm/cacheflush.h> 17#include <asm/cacheflush.h>
17#include <asm/e820.h> 18#include <asm/e820.h>
@@ -122,10 +123,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
122{ 123{
123 unsigned long pfn, offset, vaddr; 124 unsigned long pfn, offset, vaddr;
124 resource_size_t last_addr; 125 resource_size_t last_addr;
126 const resource_size_t unaligned_phys_addr = phys_addr;
127 const unsigned long unaligned_size = size;
125 struct vm_struct *area; 128 struct vm_struct *area;
126 unsigned long new_prot_val; 129 unsigned long new_prot_val;
127 pgprot_t prot; 130 pgprot_t prot;
128 int retval; 131 int retval;
132 void __iomem *ret_addr;
129 133
130 /* Don't allow wraparound or zero size */ 134 /* Don't allow wraparound or zero size */
131 last_addr = phys_addr + size - 1; 135 last_addr = phys_addr + size - 1;
@@ -233,7 +237,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
233 return NULL; 237 return NULL;
234 } 238 }
235 239
236 return (void __iomem *) (vaddr + offset); 240 ret_addr = (void __iomem *) (vaddr + offset);
241 mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
242
243 return ret_addr;
237} 244}
238 245
239/** 246/**
@@ -325,6 +332,8 @@ void iounmap(volatile void __iomem *addr)
325 addr = (volatile void __iomem *) 332 addr = (volatile void __iomem *)
326 (PAGE_MASK & (unsigned long __force)addr); 333 (PAGE_MASK & (unsigned long __force)addr);
327 334
335 mmiotrace_iounmap(addr);
336
328 /* Use the vm area unlocked, assuming the caller 337 /* Use the vm area unlocked, assuming the caller
329 ensures there isn't another iounmap for the same address 338 ensures there isn't another iounmap for the same address
330 in parallel. Reuse of the virtual address is prevented by 339 in parallel. Reuse of the virtual address is prevented by
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
new file mode 100644
index 000000000000..93d82038af4b
--- /dev/null
+++ b/arch/x86/mm/kmmio.c
@@ -0,0 +1,510 @@
1/* Support for MMIO probes.
2 * Benfit many code from kprobes
3 * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
4 * 2007 Alexander Eichner
5 * 2008 Pekka Paalanen <pq@iki.fi>
6 */
7
8#include <linux/list.h>
9#include <linux/rculist.h>
10#include <linux/spinlock.h>
11#include <linux/hash.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/uaccess.h>
16#include <linux/ptrace.h>
17#include <linux/preempt.h>
18#include <linux/percpu.h>
19#include <linux/kdebug.h>
20#include <linux/mutex.h>
21#include <linux/io.h>
22#include <asm/cacheflush.h>
23#include <asm/tlbflush.h>
24#include <linux/errno.h>
25#include <asm/debugreg.h>
26#include <linux/mmiotrace.h>
27
28#define KMMIO_PAGE_HASH_BITS 4
29#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
30
31struct kmmio_fault_page {
32 struct list_head list;
33 struct kmmio_fault_page *release_next;
34 unsigned long page; /* location of the fault page */
35
36 /*
37 * Number of times this page has been registered as a part
38 * of a probe. If zero, page is disarmed and this may be freed.
39 * Used only by writers (RCU).
40 */
41 int count;
42};
43
44struct kmmio_delayed_release {
45 struct rcu_head rcu;
46 struct kmmio_fault_page *release_list;
47};
48
49struct kmmio_context {
50 struct kmmio_fault_page *fpage;
51 struct kmmio_probe *probe;
52 unsigned long saved_flags;
53 unsigned long addr;
54 int active;
55};
56
57static DEFINE_SPINLOCK(kmmio_lock);
58
59/* Protected by kmmio_lock */
60unsigned int kmmio_count;
61
62/* Read-protected by RCU, write-protected by kmmio_lock. */
63static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
64static LIST_HEAD(kmmio_probes);
65
66static struct list_head *kmmio_page_list(unsigned long page)
67{
68 return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
69}
70
71/* Accessed per-cpu */
72static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
73
74/*
75 * this is basically a dynamic stabbing problem:
76 * Could use the existing prio tree code or
77 * Possible better implementations:
78 * The Interval Skip List: A Data Structure for Finding All Intervals That
79 * Overlap a Point (might be simple)
80 * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
81 */
82/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
83static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
84{
85 struct kmmio_probe *p;
86 list_for_each_entry_rcu(p, &kmmio_probes, list) {
87 if (addr >= p->addr && addr <= (p->addr + p->len))
88 return p;
89 }
90 return NULL;
91}
92
93/* You must be holding RCU read lock. */
94static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
95{
96 struct list_head *head;
97 struct kmmio_fault_page *p;
98
99 page &= PAGE_MASK;
100 head = kmmio_page_list(page);
101 list_for_each_entry_rcu(p, head, list) {
102 if (p->page == page)
103 return p;
104 }
105 return NULL;
106}
107
108static void set_page_present(unsigned long addr, bool present,
109 unsigned int *pglevel)
110{
111 pteval_t pteval;
112 pmdval_t pmdval;
113 unsigned int level;
114 pmd_t *pmd;
115 pte_t *pte = lookup_address(addr, &level);
116
117 if (!pte) {
118 pr_err("kmmio: no pte for page 0x%08lx\n", addr);
119 return;
120 }
121
122 if (pglevel)
123 *pglevel = level;
124
125 switch (level) {
126 case PG_LEVEL_2M:
127 pmd = (pmd_t *)pte;
128 pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT;
129 if (present)
130 pmdval |= _PAGE_PRESENT;
131 set_pmd(pmd, __pmd(pmdval));
132 break;
133
134 case PG_LEVEL_4K:
135 pteval = pte_val(*pte) & ~_PAGE_PRESENT;
136 if (present)
137 pteval |= _PAGE_PRESENT;
138 set_pte_atomic(pte, __pte(pteval));
139 break;
140
141 default:
142 pr_err("kmmio: unexpected page level 0x%x.\n", level);
143 return;
144 }
145
146 __flush_tlb_one(addr);
147}
148
149/** Mark the given page as not present. Access to it will trigger a fault. */
150static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
151{
152 set_page_present(page & PAGE_MASK, false, pglevel);
153}
154
155/** Mark the given page as present. */
156static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
157{
158 set_page_present(page & PAGE_MASK, true, pglevel);
159}
160
161/*
162 * This is being called from do_page_fault().
163 *
164 * We may be in an interrupt or a critical section. Also prefecthing may
165 * trigger a page fault. We may be in the middle of process switch.
166 * We cannot take any locks, because we could be executing especially
167 * within a kmmio critical section.
168 *
169 * Local interrupts are disabled, so preemption cannot happen.
170 * Do not enable interrupts, do not sleep, and watch out for other CPUs.
171 */
172/*
173 * Interrupts are disabled on entry as trap3 is an interrupt gate
174 * and they remain disabled thorough out this function.
175 */
176int kmmio_handler(struct pt_regs *regs, unsigned long addr)
177{
178 struct kmmio_context *ctx;
179 struct kmmio_fault_page *faultpage;
180 int ret = 0; /* default to fault not handled */
181
182 /*
183 * Preemption is now disabled to prevent process switch during
184 * single stepping. We can only handle one active kmmio trace
185 * per cpu, so ensure that we finish it before something else
186 * gets to run. We also hold the RCU read lock over single
187 * stepping to avoid looking up the probe and kmmio_fault_page
188 * again.
189 */
190 preempt_disable();
191 rcu_read_lock();
192
193 faultpage = get_kmmio_fault_page(addr);
194 if (!faultpage) {
195 /*
196 * Either this page fault is not caused by kmmio, or
197 * another CPU just pulled the kmmio probe from under
198 * our feet. The latter case should not be possible.
199 */
200 goto no_kmmio;
201 }
202
203 ctx = &get_cpu_var(kmmio_ctx);
204 if (ctx->active) {
205 disarm_kmmio_fault_page(faultpage->page, NULL);
206 if (addr == ctx->addr) {
207 /*
208 * On SMP we sometimes get recursive probe hits on the
209 * same address. Context is already saved, fall out.
210 */
211 pr_debug("kmmio: duplicate probe hit on CPU %d, for "
212 "address 0x%08lx.\n",
213 smp_processor_id(), addr);
214 ret = 1;
215 goto no_kmmio_ctx;
216 }
217 /*
218 * Prevent overwriting already in-flight context.
219 * This should not happen, let's hope disarming at least
220 * prevents a panic.
221 */
222 pr_emerg("kmmio: recursive probe hit on CPU %d, "
223 "for address 0x%08lx. Ignoring.\n",
224 smp_processor_id(), addr);
225 pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
226 ctx->addr);
227 goto no_kmmio_ctx;
228 }
229 ctx->active++;
230
231 ctx->fpage = faultpage;
232 ctx->probe = get_kmmio_probe(addr);
233 ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
234 ctx->addr = addr;
235
236 if (ctx->probe && ctx->probe->pre_handler)
237 ctx->probe->pre_handler(ctx->probe, regs, addr);
238
239 /*
240 * Enable single-stepping and disable interrupts for the faulting
241 * context. Local interrupts must not get enabled during stepping.
242 */
243 regs->flags |= X86_EFLAGS_TF;
244 regs->flags &= ~X86_EFLAGS_IF;
245
246 /* Now we set present bit in PTE and single step. */
247 disarm_kmmio_fault_page(ctx->fpage->page, NULL);
248
249 /*
250 * If another cpu accesses the same page while we are stepping,
251 * the access will not be caught. It will simply succeed and the
252 * only downside is we lose the event. If this becomes a problem,
253 * the user should drop to single cpu before tracing.
254 */
255
256 put_cpu_var(kmmio_ctx);
257 return 1; /* fault handled */
258
259no_kmmio_ctx:
260 put_cpu_var(kmmio_ctx);
261no_kmmio:
262 rcu_read_unlock();
263 preempt_enable_no_resched();
264 return ret;
265}
266
267/*
268 * Interrupts are disabled on entry as trap1 is an interrupt gate
269 * and they remain disabled thorough out this function.
270 * This must always get called as the pair to kmmio_handler().
271 */
272static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
273{
274 int ret = 0;
275 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
276
277 if (!ctx->active) {
278 pr_debug("kmmio: spurious debug trap on CPU %d.\n",
279 smp_processor_id());
280 goto out;
281 }
282
283 if (ctx->probe && ctx->probe->post_handler)
284 ctx->probe->post_handler(ctx->probe, condition, regs);
285
286 arm_kmmio_fault_page(ctx->fpage->page, NULL);
287
288 regs->flags &= ~X86_EFLAGS_TF;
289 regs->flags |= ctx->saved_flags;
290
291 /* These were acquired in kmmio_handler(). */
292 ctx->active--;
293 BUG_ON(ctx->active);
294 rcu_read_unlock();
295 preempt_enable_no_resched();
296
297 /*
298 * if somebody else is singlestepping across a probe point, flags
299 * will have TF set, in which case, continue the remaining processing
300 * of do_debug, as if this is not a probe hit.
301 */
302 if (!(regs->flags & X86_EFLAGS_TF))
303 ret = 1;
304out:
305 put_cpu_var(kmmio_ctx);
306 return ret;
307}
308
309/* You must be holding kmmio_lock. */
310static int add_kmmio_fault_page(unsigned long page)
311{
312 struct kmmio_fault_page *f;
313
314 page &= PAGE_MASK;
315 f = get_kmmio_fault_page(page);
316 if (f) {
317 if (!f->count)
318 arm_kmmio_fault_page(f->page, NULL);
319 f->count++;
320 return 0;
321 }
322
323 f = kmalloc(sizeof(*f), GFP_ATOMIC);
324 if (!f)
325 return -1;
326
327 f->count = 1;
328 f->page = page;
329 list_add_rcu(&f->list, kmmio_page_list(f->page));
330
331 arm_kmmio_fault_page(f->page, NULL);
332
333 return 0;
334}
335
336/* You must be holding kmmio_lock. */
337static void release_kmmio_fault_page(unsigned long page,
338 struct kmmio_fault_page **release_list)
339{
340 struct kmmio_fault_page *f;
341
342 page &= PAGE_MASK;
343 f = get_kmmio_fault_page(page);
344 if (!f)
345 return;
346
347 f->count--;
348 BUG_ON(f->count < 0);
349 if (!f->count) {
350 disarm_kmmio_fault_page(f->page, NULL);
351 f->release_next = *release_list;
352 *release_list = f;
353 }
354}
355
356/*
357 * With page-unaligned ioremaps, one or two armed pages may contain
358 * addresses from outside the intended mapping. Events for these addresses
359 * are currently silently dropped. The events may result only from programming
360 * mistakes by accessing addresses before the beginning or past the end of a
361 * mapping.
362 */
363int register_kmmio_probe(struct kmmio_probe *p)
364{
365 unsigned long flags;
366 int ret = 0;
367 unsigned long size = 0;
368 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
369
370 spin_lock_irqsave(&kmmio_lock, flags);
371 if (get_kmmio_probe(p->addr)) {
372 ret = -EEXIST;
373 goto out;
374 }
375 kmmio_count++;
376 list_add_rcu(&p->list, &kmmio_probes);
377 while (size < size_lim) {
378 if (add_kmmio_fault_page(p->addr + size))
379 pr_err("kmmio: Unable to set page fault.\n");
380 size += PAGE_SIZE;
381 }
382out:
383 spin_unlock_irqrestore(&kmmio_lock, flags);
384 /*
385 * XXX: What should I do here?
386 * Here was a call to global_flush_tlb(), but it does not exist
387 * anymore. It seems it's not needed after all.
388 */
389 return ret;
390}
391EXPORT_SYMBOL(register_kmmio_probe);
392
393static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
394{
395 struct kmmio_delayed_release *dr = container_of(
396 head,
397 struct kmmio_delayed_release,
398 rcu);
399 struct kmmio_fault_page *p = dr->release_list;
400 while (p) {
401 struct kmmio_fault_page *next = p->release_next;
402 BUG_ON(p->count);
403 kfree(p);
404 p = next;
405 }
406 kfree(dr);
407}
408
409static void remove_kmmio_fault_pages(struct rcu_head *head)
410{
411 struct kmmio_delayed_release *dr = container_of(
412 head,
413 struct kmmio_delayed_release,
414 rcu);
415 struct kmmio_fault_page *p = dr->release_list;
416 struct kmmio_fault_page **prevp = &dr->release_list;
417 unsigned long flags;
418 spin_lock_irqsave(&kmmio_lock, flags);
419 while (p) {
420 if (!p->count)
421 list_del_rcu(&p->list);
422 else
423 *prevp = p->release_next;
424 prevp = &p->release_next;
425 p = p->release_next;
426 }
427 spin_unlock_irqrestore(&kmmio_lock, flags);
428 /* This is the real RCU destroy call. */
429 call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
430}
431
432/*
433 * Remove a kmmio probe. You have to synchronize_rcu() before you can be
434 * sure that the callbacks will not be called anymore. Only after that
435 * you may actually release your struct kmmio_probe.
436 *
437 * Unregistering a kmmio fault page has three steps:
438 * 1. release_kmmio_fault_page()
439 * Disarm the page, wait a grace period to let all faults finish.
440 * 2. remove_kmmio_fault_pages()
441 * Remove the pages from kmmio_page_table.
442 * 3. rcu_free_kmmio_fault_pages()
443 * Actally free the kmmio_fault_page structs as with RCU.
444 */
445void unregister_kmmio_probe(struct kmmio_probe *p)
446{
447 unsigned long flags;
448 unsigned long size = 0;
449 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
450 struct kmmio_fault_page *release_list = NULL;
451 struct kmmio_delayed_release *drelease;
452
453 spin_lock_irqsave(&kmmio_lock, flags);
454 while (size < size_lim) {
455 release_kmmio_fault_page(p->addr + size, &release_list);
456 size += PAGE_SIZE;
457 }
458 list_del_rcu(&p->list);
459 kmmio_count--;
460 spin_unlock_irqrestore(&kmmio_lock, flags);
461
462 drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
463 if (!drelease) {
464 pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
465 return;
466 }
467 drelease->release_list = release_list;
468
469 /*
470 * This is not really RCU here. We have just disarmed a set of
471 * pages so that they cannot trigger page faults anymore. However,
472 * we cannot remove the pages from kmmio_page_table,
473 * because a probe hit might be in flight on another CPU. The
474 * pages are collected into a list, and they will be removed from
475 * kmmio_page_table when it is certain that no probe hit related to
476 * these pages can be in flight. RCU grace period sounds like a
477 * good choice.
478 *
479 * If we removed the pages too early, kmmio page fault handler might
480 * not find the respective kmmio_fault_page and determine it's not
481 * a kmmio fault, when it actually is. This would lead to madness.
482 */
483 call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
484}
485EXPORT_SYMBOL(unregister_kmmio_probe);
486
487static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
488 void *args)
489{
490 struct die_args *arg = args;
491
492 if (val == DIE_DEBUG && (arg->err & DR_STEP))
493 if (post_kmmio_handler(arg->err, arg->regs) == 1)
494 return NOTIFY_STOP;
495
496 return NOTIFY_DONE;
497}
498
499static struct notifier_block nb_die = {
500 .notifier_call = kmmio_die_notifier
501};
502
503static int __init init_kmmio(void)
504{
505 int i;
506 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
507 INIT_LIST_HEAD(&kmmio_page_table[i]);
508 return register_die_notifier(&nb_die);
509}
510fs_initcall(init_kmmio); /* should be before device_initcall() */
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
new file mode 100644
index 000000000000..e7397e108beb
--- /dev/null
+++ b/arch/x86/mm/mmio-mod.c
@@ -0,0 +1,515 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2005
17 * Jeff Muizelaar, 2006, 2007
18 * Pekka Paalanen, 2008 <pq@iki.fi>
19 *
20 * Derived from the read-mod example from relay-examples by Tom Zanussi.
21 */
22#define DEBUG 1
23
24#include <linux/module.h>
25#include <linux/debugfs.h>
26#include <linux/uaccess.h>
27#include <linux/io.h>
28#include <linux/version.h>
29#include <linux/kallsyms.h>
30#include <asm/pgtable.h>
31#include <linux/mmiotrace.h>
32#include <asm/e820.h> /* for ISA_START_ADDRESS */
33#include <asm/atomic.h>
34#include <linux/percpu.h>
35#include <linux/cpu.h>
36
37#include "pf_in.h"
38
39#define NAME "mmiotrace: "
40
41struct trap_reason {
42 unsigned long addr;
43 unsigned long ip;
44 enum reason_type type;
45 int active_traces;
46};
47
48struct remap_trace {
49 struct list_head list;
50 struct kmmio_probe probe;
51 resource_size_t phys;
52 unsigned long id;
53};
54
55/* Accessed per-cpu. */
56static DEFINE_PER_CPU(struct trap_reason, pf_reason);
57static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace);
58
59#if 0 /* XXX: no way gather this info anymore */
60/* Access to this is not per-cpu. */
61static DEFINE_PER_CPU(atomic_t, dropped);
62#endif
63
64static struct dentry *marker_file;
65
66static DEFINE_MUTEX(mmiotrace_mutex);
67static DEFINE_SPINLOCK(trace_lock);
68static atomic_t mmiotrace_enabled;
69static LIST_HEAD(trace_list); /* struct remap_trace */
70
71/*
72 * Locking in this file:
73 * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections.
74 * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex
75 * and trace_lock.
76 * - Routines depending on is_enabled() must take trace_lock.
77 * - trace_list users must hold trace_lock.
78 * - is_enabled() guarantees that mmio_trace_record is allowed.
79 * - pre/post callbacks assume the effect of is_enabled() being true.
80 */
81
82/* module parameters */
83static unsigned long filter_offset;
84static int nommiotrace;
85static int trace_pc;
86
87module_param(filter_offset, ulong, 0);
88module_param(nommiotrace, bool, 0);
89module_param(trace_pc, bool, 0);
90
91MODULE_PARM_DESC(filter_offset, "Start address of traced mappings.");
92MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing.");
93MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions.");
94
95static bool is_enabled(void)
96{
97 return atomic_read(&mmiotrace_enabled);
98}
99
100#if 0 /* XXX: needs rewrite */
101/*
102 * Write callback for the debugfs entry:
103 * Read a marker and write it to the mmio trace log
104 */
105static ssize_t write_marker(struct file *file, const char __user *buffer,
106 size_t count, loff_t *ppos)
107{
108 char *event = NULL;
109 struct mm_io_header *headp;
110 ssize_t len = (count > 65535) ? 65535 : count;
111
112 event = kzalloc(sizeof(*headp) + len, GFP_KERNEL);
113 if (!event)
114 return -ENOMEM;
115
116 headp = (struct mm_io_header *)event;
117 headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT);
118 headp->data_len = len;
119
120 if (copy_from_user(event + sizeof(*headp), buffer, len)) {
121 kfree(event);
122 return -EFAULT;
123 }
124
125 spin_lock_irq(&trace_lock);
126#if 0 /* XXX: convert this to use tracing */
127 if (is_enabled())
128 relay_write(chan, event, sizeof(*headp) + len);
129 else
130#endif
131 len = -EINVAL;
132 spin_unlock_irq(&trace_lock);
133 kfree(event);
134 return len;
135}
136#endif
137
138static void print_pte(unsigned long address)
139{
140 unsigned int level;
141 pte_t *pte = lookup_address(address, &level);
142
143 if (!pte) {
144 pr_err(NAME "Error in %s: no pte for page 0x%08lx\n",
145 __func__, address);
146 return;
147 }
148
149 if (level == PG_LEVEL_2M) {
150 pr_emerg(NAME "4MB pages are not currently supported: "
151 "0x%08lx\n", address);
152 BUG();
153 }
154 pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address,
155 (unsigned long long)pte_val(*pte),
156 (unsigned long long)pte_val(*pte) & _PAGE_PRESENT);
157}
158
159/*
160 * For some reason the pre/post pairs have been called in an
161 * unmatched order. Report and die.
162 */
163static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
164{
165 const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
166 pr_emerg(NAME "unexpected fault for address: 0x%08lx, "
167 "last fault for address: 0x%08lx\n",
168 addr, my_reason->addr);
169 print_pte(addr);
170 print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip);
171 print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip);
172#ifdef __i386__
173 pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
174 regs->ax, regs->bx, regs->cx, regs->dx);
175 pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
176 regs->si, regs->di, regs->bp, regs->sp);
177#else
178 pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n",
179 regs->ax, regs->cx, regs->dx);
180 pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n",
181 regs->si, regs->di, regs->bp, regs->sp);
182#endif
183 put_cpu_var(pf_reason);
184 BUG();
185}
186
187static void pre(struct kmmio_probe *p, struct pt_regs *regs,
188 unsigned long addr)
189{
190 struct trap_reason *my_reason = &get_cpu_var(pf_reason);
191 struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace);
192 const unsigned long instptr = instruction_pointer(regs);
193 const enum reason_type type = get_ins_type(instptr);
194 struct remap_trace *trace = p->private;
195
196 /* it doesn't make sense to have more than one active trace per cpu */
197 if (my_reason->active_traces)
198 die_kmmio_nesting_error(regs, addr);
199 else
200 my_reason->active_traces++;
201
202 my_reason->type = type;
203 my_reason->addr = addr;
204 my_reason->ip = instptr;
205
206 my_trace->phys = addr - trace->probe.addr + trace->phys;
207 my_trace->map_id = trace->id;
208
209 /*
210 * Only record the program counter when requested.
211 * It may taint clean-room reverse engineering.
212 */
213 if (trace_pc)
214 my_trace->pc = instptr;
215 else
216 my_trace->pc = 0;
217
218 /*
219 * XXX: the timestamp recorded will be *after* the tracing has been
220 * done, not at the time we hit the instruction. SMP implications
221 * on event ordering?
222 */
223
224 switch (type) {
225 case REG_READ:
226 my_trace->opcode = MMIO_READ;
227 my_trace->width = get_ins_mem_width(instptr);
228 break;
229 case REG_WRITE:
230 my_trace->opcode = MMIO_WRITE;
231 my_trace->width = get_ins_mem_width(instptr);
232 my_trace->value = get_ins_reg_val(instptr, regs);
233 break;
234 case IMM_WRITE:
235 my_trace->opcode = MMIO_WRITE;
236 my_trace->width = get_ins_mem_width(instptr);
237 my_trace->value = get_ins_imm_val(instptr);
238 break;
239 default:
240 {
241 unsigned char *ip = (unsigned char *)instptr;
242 my_trace->opcode = MMIO_UNKNOWN_OP;
243 my_trace->width = 0;
244 my_trace->value = (*ip) << 16 | *(ip + 1) << 8 |
245 *(ip + 2);
246 }
247 }
248 put_cpu_var(cpu_trace);
249 put_cpu_var(pf_reason);
250}
251
252static void post(struct kmmio_probe *p, unsigned long condition,
253 struct pt_regs *regs)
254{
255 struct trap_reason *my_reason = &get_cpu_var(pf_reason);
256 struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace);
257
258 /* this should always return the active_trace count to 0 */
259 my_reason->active_traces--;
260 if (my_reason->active_traces) {
261 pr_emerg(NAME "unexpected post handler");
262 BUG();
263 }
264
265 switch (my_reason->type) {
266 case REG_READ:
267 my_trace->value = get_ins_reg_val(my_reason->ip, regs);
268 break;
269 default:
270 break;
271 }
272
273 mmio_trace_rw(my_trace);
274 put_cpu_var(cpu_trace);
275 put_cpu_var(pf_reason);
276}
277
278static void ioremap_trace_core(resource_size_t offset, unsigned long size,
279 void __iomem *addr)
280{
281 static atomic_t next_id;
282 struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL);
283 /* These are page-unaligned. */
284 struct mmiotrace_map map = {
285 .phys = offset,
286 .virt = (unsigned long)addr,
287 .len = size,
288 .opcode = MMIO_PROBE
289 };
290
291 if (!trace) {
292 pr_err(NAME "kmalloc failed in ioremap\n");
293 return;
294 }
295
296 *trace = (struct remap_trace) {
297 .probe = {
298 .addr = (unsigned long)addr,
299 .len = size,
300 .pre_handler = pre,
301 .post_handler = post,
302 .private = trace
303 },
304 .phys = offset,
305 .id = atomic_inc_return(&next_id)
306 };
307 map.map_id = trace->id;
308
309 spin_lock_irq(&trace_lock);
310 if (!is_enabled())
311 goto not_enabled;
312
313 mmio_trace_mapping(&map);
314 list_add_tail(&trace->list, &trace_list);
315 if (!nommiotrace)
316 register_kmmio_probe(&trace->probe);
317
318not_enabled:
319 spin_unlock_irq(&trace_lock);
320}
321
322void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
323 void __iomem *addr)
324{
325 if (!is_enabled()) /* recheck and proper locking in *_core() */
326 return;
327
328 pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n",
329 (unsigned long long)offset, size, addr);
330 if ((filter_offset) && (offset != filter_offset))
331 return;
332 ioremap_trace_core(offset, size, addr);
333}
334
335static void iounmap_trace_core(volatile void __iomem *addr)
336{
337 struct mmiotrace_map map = {
338 .phys = 0,
339 .virt = (unsigned long)addr,
340 .len = 0,
341 .opcode = MMIO_UNPROBE
342 };
343 struct remap_trace *trace;
344 struct remap_trace *tmp;
345 struct remap_trace *found_trace = NULL;
346
347 pr_debug(NAME "Unmapping %p.\n", addr);
348
349 spin_lock_irq(&trace_lock);
350 if (!is_enabled())
351 goto not_enabled;
352
353 list_for_each_entry_safe(trace, tmp, &trace_list, list) {
354 if ((unsigned long)addr == trace->probe.addr) {
355 if (!nommiotrace)
356 unregister_kmmio_probe(&trace->probe);
357 list_del(&trace->list);
358 found_trace = trace;
359 break;
360 }
361 }
362 map.map_id = (found_trace) ? found_trace->id : -1;
363 mmio_trace_mapping(&map);
364
365not_enabled:
366 spin_unlock_irq(&trace_lock);
367 if (found_trace) {
368 synchronize_rcu(); /* unregister_kmmio_probe() requirement */
369 kfree(found_trace);
370 }
371}
372
373void mmiotrace_iounmap(volatile void __iomem *addr)
374{
375 might_sleep();
376 if (is_enabled()) /* recheck and proper locking in *_core() */
377 iounmap_trace_core(addr);
378}
379
380static void clear_trace_list(void)
381{
382 struct remap_trace *trace;
383 struct remap_trace *tmp;
384
385 /*
386 * No locking required, because the caller ensures we are in a
387 * critical section via mutex, and is_enabled() is false,
388 * i.e. nothing can traverse or modify this list.
389 * Caller also ensures is_enabled() cannot change.
390 */
391 list_for_each_entry(trace, &trace_list, list) {
392 pr_notice(NAME "purging non-iounmapped "
393 "trace @0x%08lx, size 0x%lx.\n",
394 trace->probe.addr, trace->probe.len);
395 if (!nommiotrace)
396 unregister_kmmio_probe(&trace->probe);
397 }
398 synchronize_rcu(); /* unregister_kmmio_probe() requirement */
399
400 list_for_each_entry_safe(trace, tmp, &trace_list, list) {
401 list_del(&trace->list);
402 kfree(trace);
403 }
404}
405
406#ifdef CONFIG_HOTPLUG_CPU
407static cpumask_t downed_cpus;
408
409static void enter_uniprocessor(void)
410{
411 int cpu;
412 int err;
413
414 get_online_cpus();
415 downed_cpus = cpu_online_map;
416 cpu_clear(first_cpu(cpu_online_map), downed_cpus);
417 if (num_online_cpus() > 1)
418 pr_notice(NAME "Disabling non-boot CPUs...\n");
419 put_online_cpus();
420
421 for_each_cpu_mask(cpu, downed_cpus) {
422 err = cpu_down(cpu);
423 if (!err)
424 pr_info(NAME "CPU%d is down.\n", cpu);
425 else
426 pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err);
427 }
428 if (num_online_cpus() > 1)
429 pr_warning(NAME "multiple CPUs still online, "
430 "may miss events.\n");
431}
432
433static void leave_uniprocessor(void)
434{
435 int cpu;
436 int err;
437
438 if (cpus_weight(downed_cpus) == 0)
439 return;
440 pr_notice(NAME "Re-enabling CPUs...\n");
441 for_each_cpu_mask(cpu, downed_cpus) {
442 err = cpu_up(cpu);
443 if (!err)
444 pr_info(NAME "enabled CPU%d.\n", cpu);
445 else
446 pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err);
447 }
448}
449
450#else /* !CONFIG_HOTPLUG_CPU */
451static void enter_uniprocessor(void)
452{
453 if (num_online_cpus() > 1)
454 pr_warning(NAME "multiple CPUs are online, may miss events. "
455 "Suggest booting with maxcpus=1 kernel argument.\n");
456}
457
458static void leave_uniprocessor(void)
459{
460}
461#endif
462
463#if 0 /* XXX: out of order */
464static struct file_operations fops_marker = {
465 .owner = THIS_MODULE,
466 .write = write_marker
467};
468#endif
469
470void enable_mmiotrace(void)
471{
472 mutex_lock(&mmiotrace_mutex);
473 if (is_enabled())
474 goto out;
475
476#if 0 /* XXX: tracing does not support text entries */
477 marker_file = debugfs_create_file("marker", 0660, dir, NULL,
478 &fops_marker);
479 if (!marker_file)
480 pr_err(NAME "marker file creation failed.\n");
481#endif
482
483 if (nommiotrace)
484 pr_info(NAME "MMIO tracing disabled.\n");
485 enter_uniprocessor();
486 spin_lock_irq(&trace_lock);
487 atomic_inc(&mmiotrace_enabled);
488 spin_unlock_irq(&trace_lock);
489 pr_info(NAME "enabled.\n");
490out:
491 mutex_unlock(&mmiotrace_mutex);
492}
493
494void disable_mmiotrace(void)
495{
496 mutex_lock(&mmiotrace_mutex);
497 if (!is_enabled())
498 goto out;
499
500 spin_lock_irq(&trace_lock);
501 atomic_dec(&mmiotrace_enabled);
502 BUG_ON(is_enabled());
503 spin_unlock_irq(&trace_lock);
504
505 clear_trace_list(); /* guarantees: no more kmmio callbacks */
506 leave_uniprocessor();
507 if (marker_file) {
508 debugfs_remove(marker_file);
509 marker_file = NULL;
510 }
511
512 pr_info(NAME "disabled.\n");
513out:
514 mutex_unlock(&mmiotrace_mutex);
515}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 60bcb5b6a37e..57970f2935c0 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -227,6 +227,7 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
227 227
228 return pte_offset_kernel(pmd, address); 228 return pte_offset_kernel(pmd, address);
229} 229}
230EXPORT_SYMBOL_GPL(lookup_address);
230 231
231/* 232/*
232 * Set the new pmd in all the pgds we know about: 233 * Set the new pmd in all the pgds we know about:
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
new file mode 100644
index 000000000000..efa1911e20ca
--- /dev/null
+++ b/arch/x86/mm/pf_in.c
@@ -0,0 +1,489 @@
1/*
2 * Fault Injection Test harness (FI)
3 * Copyright (C) Intel Crop.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
18 * USA.
19 *
20 */
21
22/* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp
23 * Copyright by Intel Crop., 2002
24 * Louis Zhuang (louis.zhuang@intel.com)
25 *
26 * Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007
27 */
28
29#include <linux/module.h>
30#include <linux/ptrace.h> /* struct pt_regs */
31#include "pf_in.h"
32
33#ifdef __i386__
34/* IA32 Manual 3, 2-1 */
35static unsigned char prefix_codes[] = {
36 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64,
37 0x65, 0x2E, 0x3E, 0x66, 0x67
38};
39/* IA32 Manual 3, 3-432*/
40static unsigned int reg_rop[] = {
41 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
42};
43static unsigned int reg_wop[] = { 0x88, 0x89 };
44static unsigned int imm_wop[] = { 0xC6, 0xC7 };
45/* IA32 Manual 3, 3-432*/
46static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 };
47static unsigned int rw32[] = {
48 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
49};
50static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F };
51static unsigned int mw16[] = { 0xB70F, 0xBF0F };
52static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 };
53static unsigned int mw64[] = {};
54#else /* not __i386__ */
55static unsigned char prefix_codes[] = {
56 0x66, 0x67, 0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36,
57 0xF0, 0xF3, 0xF2,
58 /* REX Prefixes */
59 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
60 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f
61};
62/* AMD64 Manual 3, Appendix A*/
63static unsigned int reg_rop[] = {
64 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
65};
66static unsigned int reg_wop[] = { 0x88, 0x89 };
67static unsigned int imm_wop[] = { 0xC6, 0xC7 };
68static unsigned int rw8[] = { 0xC6, 0x88, 0x8A };
69static unsigned int rw32[] = {
70 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
71};
72/* 8 bit only */
73static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F };
74/* 16 bit only */
75static unsigned int mw16[] = { 0xB70F, 0xBF0F };
76/* 16 or 32 bit */
77static unsigned int mw32[] = { 0xC7 };
78/* 16, 32 or 64 bit */
79static unsigned int mw64[] = { 0x89, 0x8B };
80#endif /* not __i386__ */
81
82static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged,
83 int *rexr)
84{
85 int i;
86 unsigned char *p = addr;
87 *shorted = 0;
88 *enlarged = 0;
89 *rexr = 0;
90
91restart:
92 for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) {
93 if (*p == prefix_codes[i]) {
94 if (*p == 0x66)
95 *shorted = 1;
96#ifdef __amd64__
97 if ((*p & 0xf8) == 0x48)
98 *enlarged = 1;
99 if ((*p & 0xf4) == 0x44)
100 *rexr = 1;
101#endif
102 p++;
103 goto restart;
104 }
105 }
106
107 return (p - addr);
108}
109
110static int get_opcode(unsigned char *addr, unsigned int *opcode)
111{
112 int len;
113
114 if (*addr == 0x0F) {
115 /* 0x0F is extension instruction */
116 *opcode = *(unsigned short *)addr;
117 len = 2;
118 } else {
119 *opcode = *addr;
120 len = 1;
121 }
122
123 return len;
124}
125
126#define CHECK_OP_TYPE(opcode, array, type) \
127 for (i = 0; i < ARRAY_SIZE(array); i++) { \
128 if (array[i] == opcode) { \
129 rv = type; \
130 goto exit; \
131 } \
132 }
133
134enum reason_type get_ins_type(unsigned long ins_addr)
135{
136 unsigned int opcode;
137 unsigned char *p;
138 int shorted, enlarged, rexr;
139 int i;
140 enum reason_type rv = OTHERS;
141
142 p = (unsigned char *)ins_addr;
143 p += skip_prefix(p, &shorted, &enlarged, &rexr);
144 p += get_opcode(p, &opcode);
145
146 CHECK_OP_TYPE(opcode, reg_rop, REG_READ);
147 CHECK_OP_TYPE(opcode, reg_wop, REG_WRITE);
148 CHECK_OP_TYPE(opcode, imm_wop, IMM_WRITE);
149
150exit:
151 return rv;
152}
153#undef CHECK_OP_TYPE
154
155static unsigned int get_ins_reg_width(unsigned long ins_addr)
156{
157 unsigned int opcode;
158 unsigned char *p;
159 int i, shorted, enlarged, rexr;
160
161 p = (unsigned char *)ins_addr;
162 p += skip_prefix(p, &shorted, &enlarged, &rexr);
163 p += get_opcode(p, &opcode);
164
165 for (i = 0; i < ARRAY_SIZE(rw8); i++)
166 if (rw8[i] == opcode)
167 return 1;
168
169 for (i = 0; i < ARRAY_SIZE(rw32); i++)
170 if (rw32[i] == opcode)
171 return (shorted ? 2 : (enlarged ? 8 : 4));
172
173 printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
174 return 0;
175}
176
177unsigned int get_ins_mem_width(unsigned long ins_addr)
178{
179 unsigned int opcode;
180 unsigned char *p;
181 int i, shorted, enlarged, rexr;
182
183 p = (unsigned char *)ins_addr;
184 p += skip_prefix(p, &shorted, &enlarged, &rexr);
185 p += get_opcode(p, &opcode);
186
187 for (i = 0; i < ARRAY_SIZE(mw8); i++)
188 if (mw8[i] == opcode)
189 return 1;
190
191 for (i = 0; i < ARRAY_SIZE(mw16); i++)
192 if (mw16[i] == opcode)
193 return 2;
194
195 for (i = 0; i < ARRAY_SIZE(mw32); i++)
196 if (mw32[i] == opcode)
197 return shorted ? 2 : 4;
198
199 for (i = 0; i < ARRAY_SIZE(mw64); i++)
200 if (mw64[i] == opcode)
201 return shorted ? 2 : (enlarged ? 8 : 4);
202
203 printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
204 return 0;
205}
206
207/*
208 * Define register ident in mod/rm byte.
209 * Note: these are NOT the same as in ptrace-abi.h.
210 */
211enum {
212 arg_AL = 0,
213 arg_CL = 1,
214 arg_DL = 2,
215 arg_BL = 3,
216 arg_AH = 4,
217 arg_CH = 5,
218 arg_DH = 6,
219 arg_BH = 7,
220
221 arg_AX = 0,
222 arg_CX = 1,
223 arg_DX = 2,
224 arg_BX = 3,
225 arg_SP = 4,
226 arg_BP = 5,
227 arg_SI = 6,
228 arg_DI = 7,
229#ifdef __amd64__
230 arg_R8 = 8,
231 arg_R9 = 9,
232 arg_R10 = 10,
233 arg_R11 = 11,
234 arg_R12 = 12,
235 arg_R13 = 13,
236 arg_R14 = 14,
237 arg_R15 = 15
238#endif
239};
240
241static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
242{
243 unsigned char *rv = NULL;
244
245 switch (no) {
246 case arg_AL:
247 rv = (unsigned char *)&regs->ax;
248 break;
249 case arg_BL:
250 rv = (unsigned char *)&regs->bx;
251 break;
252 case arg_CL:
253 rv = (unsigned char *)&regs->cx;
254 break;
255 case arg_DL:
256 rv = (unsigned char *)&regs->dx;
257 break;
258 case arg_AH:
259 rv = 1 + (unsigned char *)&regs->ax;
260 break;
261 case arg_BH:
262 rv = 1 + (unsigned char *)&regs->bx;
263 break;
264 case arg_CH:
265 rv = 1 + (unsigned char *)&regs->cx;
266 break;
267 case arg_DH:
268 rv = 1 + (unsigned char *)&regs->dx;
269 break;
270#ifdef __amd64__
271 case arg_R8:
272 rv = (unsigned char *)&regs->r8;
273 break;
274 case arg_R9:
275 rv = (unsigned char *)&regs->r9;
276 break;
277 case arg_R10:
278 rv = (unsigned char *)&regs->r10;
279 break;
280 case arg_R11:
281 rv = (unsigned char *)&regs->r11;
282 break;
283 case arg_R12:
284 rv = (unsigned char *)&regs->r12;
285 break;
286 case arg_R13:
287 rv = (unsigned char *)&regs->r13;
288 break;
289 case arg_R14:
290 rv = (unsigned char *)&regs->r14;
291 break;
292 case arg_R15:
293 rv = (unsigned char *)&regs->r15;
294 break;
295#endif
296 default:
297 printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
298 break;
299 }
300 return rv;
301}
302
303static unsigned long *get_reg_w32(int no, struct pt_regs *regs)
304{
305 unsigned long *rv = NULL;
306
307 switch (no) {
308 case arg_AX:
309 rv = &regs->ax;
310 break;
311 case arg_BX:
312 rv = &regs->bx;
313 break;
314 case arg_CX:
315 rv = &regs->cx;
316 break;
317 case arg_DX:
318 rv = &regs->dx;
319 break;
320 case arg_SP:
321 rv = &regs->sp;
322 break;
323 case arg_BP:
324 rv = &regs->bp;
325 break;
326 case arg_SI:
327 rv = &regs->si;
328 break;
329 case arg_DI:
330 rv = &regs->di;
331 break;
332#ifdef __amd64__
333 case arg_R8:
334 rv = &regs->r8;
335 break;
336 case arg_R9:
337 rv = &regs->r9;
338 break;
339 case arg_R10:
340 rv = &regs->r10;
341 break;
342 case arg_R11:
343 rv = &regs->r11;
344 break;
345 case arg_R12:
346 rv = &regs->r12;
347 break;
348 case arg_R13:
349 rv = &regs->r13;
350 break;
351 case arg_R14:
352 rv = &regs->r14;
353 break;
354 case arg_R15:
355 rv = &regs->r15;
356 break;
357#endif
358 default:
359 printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
360 }
361
362 return rv;
363}
364
365unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
366{
367 unsigned int opcode;
368 unsigned char mod_rm;
369 int reg;
370 unsigned char *p;
371 int i, shorted, enlarged, rexr;
372 unsigned long rv;
373
374 p = (unsigned char *)ins_addr;
375 p += skip_prefix(p, &shorted, &enlarged, &rexr);
376 p += get_opcode(p, &opcode);
377 for (i = 0; i < ARRAY_SIZE(reg_rop); i++)
378 if (reg_rop[i] == opcode) {
379 rv = REG_READ;
380 goto do_work;
381 }
382
383 for (i = 0; i < ARRAY_SIZE(reg_wop); i++)
384 if (reg_wop[i] == opcode) {
385 rv = REG_WRITE;
386 goto do_work;
387 }
388
389 printk(KERN_ERR "mmiotrace: Not a register instruction, opcode "
390 "0x%02x\n", opcode);
391 goto err;
392
393do_work:
394 mod_rm = *p;
395 reg = ((mod_rm >> 3) & 0x7) | (rexr << 3);
396 switch (get_ins_reg_width(ins_addr)) {
397 case 1:
398 return *get_reg_w8(reg, regs);
399
400 case 2:
401 return *(unsigned short *)get_reg_w32(reg, regs);
402
403 case 4:
404 return *(unsigned int *)get_reg_w32(reg, regs);
405
406#ifdef __amd64__
407 case 8:
408 return *(unsigned long *)get_reg_w32(reg, regs);
409#endif
410
411 default:
412 printk(KERN_ERR "mmiotrace: Error width# %d\n", reg);
413 }
414
415err:
416 return 0;
417}
418
419unsigned long get_ins_imm_val(unsigned long ins_addr)
420{
421 unsigned int opcode;
422 unsigned char mod_rm;
423 unsigned char mod;
424 unsigned char *p;
425 int i, shorted, enlarged, rexr;
426 unsigned long rv;
427
428 p = (unsigned char *)ins_addr;
429 p += skip_prefix(p, &shorted, &enlarged, &rexr);
430 p += get_opcode(p, &opcode);
431 for (i = 0; i < ARRAY_SIZE(imm_wop); i++)
432 if (imm_wop[i] == opcode) {
433 rv = IMM_WRITE;
434 goto do_work;
435 }
436
437 printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode "
438 "0x%02x\n", opcode);
439 goto err;
440
441do_work:
442 mod_rm = *p;
443 mod = mod_rm >> 6;
444 p++;
445 switch (mod) {
446 case 0:
447 /* if r/m is 5 we have a 32 disp (IA32 Manual 3, Table 2-2) */
448 /* AMD64: XXX Check for address size prefix? */
449 if ((mod_rm & 0x7) == 0x5)
450 p += 4;
451 break;
452
453 case 1:
454 p += 1;
455 break;
456
457 case 2:
458 p += 4;
459 break;
460
461 case 3:
462 default:
463 printk(KERN_ERR "mmiotrace: not a memory access instruction "
464 "at 0x%lx, rm_mod=0x%02x\n",
465 ins_addr, mod_rm);
466 }
467
468 switch (get_ins_reg_width(ins_addr)) {
469 case 1:
470 return *(unsigned char *)p;
471
472 case 2:
473 return *(unsigned short *)p;
474
475 case 4:
476 return *(unsigned int *)p;
477
478#ifdef __amd64__
479 case 8:
480 return *(unsigned long *)p;
481#endif
482
483 default:
484 printk(KERN_ERR "mmiotrace: Error: width.\n");
485 }
486
487err:
488 return 0;
489}
diff --git a/arch/x86/mm/pf_in.h b/arch/x86/mm/pf_in.h
new file mode 100644
index 000000000000..e05341a51a27
--- /dev/null
+++ b/arch/x86/mm/pf_in.h
@@ -0,0 +1,39 @@
1/*
2 * Fault Injection Test harness (FI)
3 * Copyright (C) Intel Crop.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
18 * USA.
19 *
20 */
21
22#ifndef __PF_H_
23#define __PF_H_
24
25enum reason_type {
26 NOT_ME, /* page fault is not in regions */
27 NOTHING, /* access others point in regions */
28 REG_READ, /* read from addr to reg */
29 REG_WRITE, /* write from reg to addr */
30 IMM_WRITE, /* write from imm to addr */
31 OTHERS /* Other instructions can not intercept */
32};
33
34enum reason_type get_ins_type(unsigned long ins_addr);
35unsigned int get_ins_mem_width(unsigned long ins_addr);
36unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs);
37unsigned long get_ins_imm_val(unsigned long ins_addr);
38
39#endif /* __PF_H_ */
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
new file mode 100644
index 000000000000..d877c5b423ef
--- /dev/null
+++ b/arch/x86/mm/testmmiotrace.c
@@ -0,0 +1,71 @@
1/*
2 * Written by Pekka Paalanen, 2008 <pq@iki.fi>
3 */
4#include <linux/module.h>
5#include <linux/io.h>
6
7#define MODULE_NAME "testmmiotrace"
8
9static unsigned long mmio_address;
10module_param(mmio_address, ulong, 0);
11MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB.");
12
13static void do_write_test(void __iomem *p)
14{
15 unsigned int i;
16 for (i = 0; i < 256; i++)
17 iowrite8(i, p + i);
18 for (i = 1024; i < (5 * 1024); i += 2)
19 iowrite16(i * 12 + 7, p + i);
20 for (i = (5 * 1024); i < (16 * 1024); i += 4)
21 iowrite32(i * 212371 + 13, p + i);
22}
23
24static void do_read_test(void __iomem *p)
25{
26 unsigned int i;
27 for (i = 0; i < 256; i++)
28 ioread8(p + i);
29 for (i = 1024; i < (5 * 1024); i += 2)
30 ioread16(p + i);
31 for (i = (5 * 1024); i < (16 * 1024); i += 4)
32 ioread32(p + i);
33}
34
35static void do_test(void)
36{
37 void __iomem *p = ioremap_nocache(mmio_address, 0x4000);
38 if (!p) {
39 pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
40 return;
41 }
42 do_write_test(p);
43 do_read_test(p);
44 iounmap(p);
45}
46
47static int __init init(void)
48{
49 if (mmio_address == 0) {
50 pr_err(MODULE_NAME ": you have to use the module argument "
51 "mmio_address.\n");
52 pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
53 " YOU REALLY KNOW WHAT YOU ARE DOING!\n");
54 return -ENXIO;
55 }
56
57 pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
58 "in PCI address space, and writing "
59 "rubbish in there.\n", mmio_address);
60 do_test();
61 return 0;
62}
63
64static void __exit cleanup(void)
65{
66 pr_debug(MODULE_NAME ": unloaded.\n");
67}
68
69module_init(init);
70module_exit(cleanup);
71MODULE_LICENSE("GPL");
diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h
index a80f2d6cc737..96651bb59ba1 100644
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h
@@ -35,13 +35,4 @@ extern void show_regs(struct pt_regs *regs);
35extern unsigned long oops_begin(void); 35extern unsigned long oops_begin(void);
36extern void oops_end(unsigned long, struct pt_regs *, int signr); 36extern void oops_end(unsigned long, struct pt_regs *, int signr);
37 37
38struct pf_handler {
39 struct hlist_node hlist;
40 int (*handler)(struct pt_regs *regs, unsigned long error_code,
41 unsigned long address);
42};
43
44extern void register_page_fault_handler(struct pf_handler *new_pfh);
45extern void unregister_page_fault_handler(struct pf_handler *old_pfh);
46
47#endif 38#endif
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
new file mode 100644
index 000000000000..61d19e1b7a0b
--- /dev/null
+++ b/include/linux/mmiotrace.h
@@ -0,0 +1,85 @@
1#ifndef MMIOTRACE_H
2#define MMIOTRACE_H
3
4#include <linux/types.h>
5#include <linux/list.h>
6
7struct kmmio_probe;
8struct pt_regs;
9
10typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *,
11 struct pt_regs *, unsigned long addr);
12typedef void (*kmmio_post_handler_t)(struct kmmio_probe *,
13 unsigned long condition, struct pt_regs *);
14
15struct kmmio_probe {
16 struct list_head list; /* kmmio internal list */
17 unsigned long addr; /* start location of the probe point */
18 unsigned long len; /* length of the probe region */
19 kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */
20 kmmio_post_handler_t post_handler; /* Called after addr is executed */
21 void *private;
22};
23
24/* kmmio is active by some kmmio_probes? */
25static inline int is_kmmio_active(void)
26{
27 extern unsigned int kmmio_count;
28 return kmmio_count;
29}
30
31extern int register_kmmio_probe(struct kmmio_probe *p);
32extern void unregister_kmmio_probe(struct kmmio_probe *p);
33
34/* Called from page fault handler. */
35extern int kmmio_handler(struct pt_regs *regs, unsigned long addr);
36
37/* Called from ioremap.c */
38#ifdef CONFIG_MMIOTRACE
39extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
40 void __iomem *addr);
41extern void mmiotrace_iounmap(volatile void __iomem *addr);
42#else
43static inline void mmiotrace_ioremap(resource_size_t offset,
44 unsigned long size, void __iomem *addr)
45{
46}
47
48static inline void mmiotrace_iounmap(volatile void __iomem *addr)
49{
50}
51#endif /* CONFIG_MMIOTRACE_HOOKS */
52
53enum mm_io_opcode {
54 MMIO_READ = 0x1, /* struct mmiotrace_rw */
55 MMIO_WRITE = 0x2, /* struct mmiotrace_rw */
56 MMIO_PROBE = 0x3, /* struct mmiotrace_map */
57 MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */
58 MMIO_MARKER = 0x5, /* raw char data */
59 MMIO_UNKNOWN_OP = 0x6, /* struct mmiotrace_rw */
60};
61
62struct mmiotrace_rw {
63 resource_size_t phys; /* PCI address of register */
64 unsigned long value;
65 unsigned long pc; /* optional program counter */
66 int map_id;
67 unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */
68 unsigned char width; /* size of register access in bytes */
69};
70
71struct mmiotrace_map {
72 resource_size_t phys; /* base address in PCI space */
73 unsigned long virt; /* base virtual address */
74 unsigned long len; /* mapping size */
75 int map_id;
76 unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */
77};
78
79/* in kernel/trace/trace_mmiotrace.c */
80extern void enable_mmiotrace(void);
81extern void disable_mmiotrace(void);
82extern void mmio_trace_rw(struct mmiotrace_rw *rw);
83extern void mmio_trace_mapping(struct mmiotrace_map *map);
84
85#endif /* MMIOTRACE_H */
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index d9efbbfa2bdf..c44a7dce9086 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -18,5 +18,6 @@ obj-$(CONFIG_FTRACE) += trace_functions.o
18obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o 18obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
19obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o 19obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
20obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o 20obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
21obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
21 22
22libftrace-y := ftrace.o 23libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9ade79369bfb..4a875600733b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -848,6 +848,48 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
848 trace_function(tr, data, ip, parent_ip, flags); 848 trace_function(tr, data, ip, parent_ip, flags);
849} 849}
850 850
851#ifdef CONFIG_MMIOTRACE
852void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data,
853 struct mmiotrace_rw *rw)
854{
855 struct trace_entry *entry;
856 unsigned long irq_flags;
857
858 raw_local_irq_save(irq_flags);
859 __raw_spin_lock(&data->lock);
860
861 entry = tracing_get_trace_entry(tr, data);
862 tracing_generic_entry_update(entry, 0);
863 entry->type = TRACE_MMIO_RW;
864 entry->mmiorw = *rw;
865
866 __raw_spin_unlock(&data->lock);
867 raw_local_irq_restore(irq_flags);
868
869 trace_wake_up();
870}
871
872void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data,
873 struct mmiotrace_map *map)
874{
875 struct trace_entry *entry;
876 unsigned long irq_flags;
877
878 raw_local_irq_save(irq_flags);
879 __raw_spin_lock(&data->lock);
880
881 entry = tracing_get_trace_entry(tr, data);
882 tracing_generic_entry_update(entry, 0);
883 entry->type = TRACE_MMIO_MAP;
884 entry->mmiomap = *map;
885
886 __raw_spin_unlock(&data->lock);
887 raw_local_irq_restore(irq_flags);
888
889 trace_wake_up();
890}
891#endif
892
851void __trace_stack(struct trace_array *tr, 893void __trace_stack(struct trace_array *tr,
852 struct trace_array_cpu *data, 894 struct trace_array_cpu *data,
853 unsigned long flags, 895 unsigned long flags,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6b8bd8800d04..4966e6a964fe 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -5,6 +5,7 @@
5#include <asm/atomic.h> 5#include <asm/atomic.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/clocksource.h> 7#include <linux/clocksource.h>
8#include <linux/mmiotrace.h>
8 9
9enum trace_type { 10enum trace_type {
10 __TRACE_FIRST_TYPE = 0, 11 __TRACE_FIRST_TYPE = 0,
@@ -14,6 +15,8 @@ enum trace_type {
14 TRACE_WAKE, 15 TRACE_WAKE,
15 TRACE_STACK, 16 TRACE_STACK,
16 TRACE_SPECIAL, 17 TRACE_SPECIAL,
18 TRACE_MMIO_RW,
19 TRACE_MMIO_MAP,
17 20
18 __TRACE_LAST_TYPE 21 __TRACE_LAST_TYPE
19}; 22};
@@ -75,6 +78,8 @@ struct trace_entry {
75 struct ctx_switch_entry ctx; 78 struct ctx_switch_entry ctx;
76 struct special_entry special; 79 struct special_entry special;
77 struct stack_entry stack; 80 struct stack_entry stack;
81 struct mmiotrace_rw mmiorw;
82 struct mmiotrace_map mmiomap;
78 }; 83 };
79}; 84};
80 85
@@ -255,6 +260,15 @@ extern unsigned long ftrace_update_tot_cnt;
255extern int DYN_FTRACE_TEST_NAME(void); 260extern int DYN_FTRACE_TEST_NAME(void);
256#endif 261#endif
257 262
263#ifdef CONFIG_MMIOTRACE
264extern void __trace_mmiotrace_rw(struct trace_array *tr,
265 struct trace_array_cpu *data,
266 struct mmiotrace_rw *rw);
267extern void __trace_mmiotrace_map(struct trace_array *tr,
268 struct trace_array_cpu *data,
269 struct mmiotrace_map *map);
270#endif
271
258#ifdef CONFIG_FTRACE_STARTUP_TEST 272#ifdef CONFIG_FTRACE_STARTUP_TEST
259#ifdef CONFIG_FTRACE 273#ifdef CONFIG_FTRACE
260extern int trace_selftest_startup_function(struct tracer *trace, 274extern int trace_selftest_startup_function(struct tracer *trace,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
new file mode 100644
index 000000000000..b13dc19dcbb4
--- /dev/null
+++ b/kernel/trace/trace_mmiotrace.c
@@ -0,0 +1,295 @@
1/*
2 * Memory mapped I/O tracing
3 *
4 * Copyright (C) 2008 Pekka Paalanen <pq@iki.fi>
5 */
6
7#define DEBUG 1
8
9#include <linux/kernel.h>
10#include <linux/mmiotrace.h>
11#include <linux/pci.h>
12
13#include "trace.h"
14
15struct header_iter {
16 struct pci_dev *dev;
17};
18
19static struct trace_array *mmio_trace_array;
20static bool overrun_detected;
21
22static void mmio_reset_data(struct trace_array *tr)
23{
24 int cpu;
25
26 overrun_detected = false;
27 tr->time_start = ftrace_now(tr->cpu);
28
29 for_each_online_cpu(cpu)
30 tracing_reset(tr->data[cpu]);
31}
32
33static void mmio_trace_init(struct trace_array *tr)
34{
35 pr_debug("in %s\n", __func__);
36 mmio_trace_array = tr;
37 if (tr->ctrl) {
38 mmio_reset_data(tr);
39 enable_mmiotrace();
40 }
41}
42
43static void mmio_trace_reset(struct trace_array *tr)
44{
45 pr_debug("in %s\n", __func__);
46 if (tr->ctrl)
47 disable_mmiotrace();
48 mmio_reset_data(tr);
49 mmio_trace_array = NULL;
50}
51
52static void mmio_trace_ctrl_update(struct trace_array *tr)
53{
54 pr_debug("in %s\n", __func__);
55 if (tr->ctrl) {
56 mmio_reset_data(tr);
57 enable_mmiotrace();
58 } else {
59 disable_mmiotrace();
60 }
61}
62
63static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
64{
65 int ret = 0;
66 int i;
67 resource_size_t start, end;
68 const struct pci_driver *drv = pci_dev_driver(dev);
69
70 /* XXX: incomplete checks for trace_seq_printf() return value */
71 ret += trace_seq_printf(s, "PCIDEV %02x%02x %04x%04x %x",
72 dev->bus->number, dev->devfn,
73 dev->vendor, dev->device, dev->irq);
74 /*
75 * XXX: is pci_resource_to_user() appropriate, since we are
76 * supposed to interpret the __ioremap() phys_addr argument based on
77 * these printed values?
78 */
79 for (i = 0; i < 7; i++) {
80 pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
81 ret += trace_seq_printf(s, " %llx",
82 (unsigned long long)(start |
83 (dev->resource[i].flags & PCI_REGION_FLAG_MASK)));
84 }
85 for (i = 0; i < 7; i++) {
86 pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
87 ret += trace_seq_printf(s, " %llx",
88 dev->resource[i].start < dev->resource[i].end ?
89 (unsigned long long)(end - start) + 1 : 0);
90 }
91 if (drv)
92 ret += trace_seq_printf(s, " %s\n", drv->name);
93 else
94 ret += trace_seq_printf(s, " \n");
95 return ret;
96}
97
98static void destroy_header_iter(struct header_iter *hiter)
99{
100 if (!hiter)
101 return;
102 pci_dev_put(hiter->dev);
103 kfree(hiter);
104}
105
106static void mmio_pipe_open(struct trace_iterator *iter)
107{
108 struct header_iter *hiter;
109 struct trace_seq *s = &iter->seq;
110
111 trace_seq_printf(s, "VERSION 20070824\n");
112
113 hiter = kzalloc(sizeof(*hiter), GFP_KERNEL);
114 if (!hiter)
115 return;
116
117 hiter->dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, NULL);
118 iter->private = hiter;
119}
120
121/* XXX: This is not called when the pipe is closed! */
122static void mmio_close(struct trace_iterator *iter)
123{
124 struct header_iter *hiter = iter->private;
125 destroy_header_iter(hiter);
126 iter->private = NULL;
127}
128
129static unsigned long count_overruns(struct trace_iterator *iter)
130{
131 int cpu;
132 unsigned long cnt = 0;
133 for_each_online_cpu(cpu) {
134 cnt += iter->overrun[cpu];
135 iter->overrun[cpu] = 0;
136 }
137 return cnt;
138}
139
140static ssize_t mmio_read(struct trace_iterator *iter, struct file *filp,
141 char __user *ubuf, size_t cnt, loff_t *ppos)
142{
143 ssize_t ret;
144 struct header_iter *hiter = iter->private;
145 struct trace_seq *s = &iter->seq;
146 unsigned long n;
147
148 n = count_overruns(iter);
149 if (n) {
150 /* XXX: This is later than where events were lost. */
151 trace_seq_printf(s, "MARK 0.000000 Lost %lu events.\n", n);
152 if (!overrun_detected)
153 pr_warning("mmiotrace has lost events.\n");
154 overrun_detected = true;
155 goto print_out;
156 }
157
158 if (!hiter)
159 return 0;
160
161 mmio_print_pcidev(s, hiter->dev);
162 hiter->dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, hiter->dev);
163
164 if (!hiter->dev) {
165 destroy_header_iter(hiter);
166 iter->private = NULL;
167 }
168
169print_out:
170 ret = trace_seq_to_user(s, ubuf, cnt);
171 return (ret == -EBUSY) ? 0 : ret;
172}
173
174static int mmio_print_rw(struct trace_iterator *iter)
175{
176 struct trace_entry *entry = iter->ent;
177 struct mmiotrace_rw *rw = &entry->mmiorw;
178 struct trace_seq *s = &iter->seq;
179 unsigned long long t = ns2usecs(entry->t);
180 unsigned long usec_rem = do_div(t, 1000000ULL);
181 unsigned secs = (unsigned long)t;
182 int ret = 1;
183
184 switch (entry->mmiorw.opcode) {
185 case MMIO_READ:
186 ret = trace_seq_printf(s,
187 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
188 rw->width, secs, usec_rem, rw->map_id,
189 (unsigned long long)rw->phys,
190 rw->value, rw->pc, 0);
191 break;
192 case MMIO_WRITE:
193 ret = trace_seq_printf(s,
194 "W %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
195 rw->width, secs, usec_rem, rw->map_id,
196 (unsigned long long)rw->phys,
197 rw->value, rw->pc, 0);
198 break;
199 case MMIO_UNKNOWN_OP:
200 ret = trace_seq_printf(s,
201 "UNKNOWN %lu.%06lu %d 0x%llx %02x,%02x,%02x 0x%lx %d\n",
202 secs, usec_rem, rw->map_id,
203 (unsigned long long)rw->phys,
204 (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff,
205 (rw->value >> 0) & 0xff, rw->pc, 0);
206 break;
207 default:
208 ret = trace_seq_printf(s, "rw what?\n");
209 break;
210 }
211 if (ret)
212 return 1;
213 return 0;
214}
215
216static int mmio_print_map(struct trace_iterator *iter)
217{
218 struct trace_entry *entry = iter->ent;
219 struct mmiotrace_map *m = &entry->mmiomap;
220 struct trace_seq *s = &iter->seq;
221 unsigned long long t = ns2usecs(entry->t);
222 unsigned long usec_rem = do_div(t, 1000000ULL);
223 unsigned secs = (unsigned long)t;
224 int ret = 1;
225
226 switch (entry->mmiorw.opcode) {
227 case MMIO_PROBE:
228 ret = trace_seq_printf(s,
229 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
230 secs, usec_rem, m->map_id,
231 (unsigned long long)m->phys, m->virt, m->len,
232 0UL, 0);
233 break;
234 case MMIO_UNPROBE:
235 ret = trace_seq_printf(s,
236 "UNMAP %lu.%06lu %d 0x%lx %d\n",
237 secs, usec_rem, m->map_id, 0UL, 0);
238 break;
239 default:
240 ret = trace_seq_printf(s, "map what?\n");
241 break;
242 }
243 if (ret)
244 return 1;
245 return 0;
246}
247
248/* return 0 to abort printing without consuming current entry in pipe mode */
249static int mmio_print_line(struct trace_iterator *iter)
250{
251 switch (iter->ent->type) {
252 case TRACE_MMIO_RW:
253 return mmio_print_rw(iter);
254 case TRACE_MMIO_MAP:
255 return mmio_print_map(iter);
256 default:
257 return 1; /* ignore unknown entries */
258 }
259}
260
261static struct tracer mmio_tracer __read_mostly =
262{
263 .name = "mmiotrace",
264 .init = mmio_trace_init,
265 .reset = mmio_trace_reset,
266 .pipe_open = mmio_pipe_open,
267 .close = mmio_close,
268 .read = mmio_read,
269 .ctrl_update = mmio_trace_ctrl_update,
270 .print_line = mmio_print_line,
271};
272
273__init static int init_mmio_trace(void)
274{
275 return register_tracer(&mmio_tracer);
276}
277device_initcall(init_mmio_trace);
278
279void mmio_trace_rw(struct mmiotrace_rw *rw)
280{
281 struct trace_array *tr = mmio_trace_array;
282 struct trace_array_cpu *data = tr->data[smp_processor_id()];
283 __trace_mmiotrace_rw(tr, data, rw);
284}
285
286void mmio_trace_mapping(struct mmiotrace_map *map)
287{
288 struct trace_array *tr = mmio_trace_array;
289 struct trace_array_cpu *data;
290
291 preempt_disable();
292 data = tr->data[smp_processor_id()];
293 __trace_mmiotrace_map(tr, data, map);
294 preempt_enable();
295}