aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPekka Paalanen <pq@iki.fi>2008-05-12 15:20:57 -0400
committerThomas Gleixner <tglx@linutronix.de>2008-05-24 05:22:12 -0400
commit0fd0e3da4557c479b820b9a4a7afa25b4637ddf2 (patch)
tree5f34b3673202303f394c6dd180a15751f50014e9
parentf513638030ca384b0bace4df64f0b82f6ae1e4c6 (diff)
x86: mmiotrace full patch, preview 1
kmmio.c handles the list of mmio probes with callbacks, list of traced pages, and attaching into the page fault handler and die notifier. It arms, traps and disarms the given pages, this is the core of mmiotrace. mmio-mod.c is a user interface, hooking into ioremap functions and registering the mmio probes. It also decodes the required information from trapped mmio accesses via the pre and post callbacks in each probe. Currently, hooking into ioremap functions works by redefining the symbols of the target (binary) kernel module, so that it calls the traced versions of the functions. The most notable changes done since the last discussion are: - kmmio.c is a built-in, not part of the module - direct call from fault.c to kmmio.c, removing all dynamic hooks - prepare for unregistering probes at any time - make kmmio re-initializable and accessible to more than one user - rewrite kmmio locking to remove all spinlocks from page fault path Can I abuse call_rcu() like I do in kmmio.c:unregister_kmmio_probe() or is there a better way? The function called via call_rcu() itself calls call_rcu() again, will this work or break? There I need a second grace period for RCU after the first grace period for page faults. Mmiotrace itself (mmio-mod.c) is still a module, I am going to attack that next. At some point I will start looking into how to make mmiotrace a tracer component of ftrace (thanks for the hint, Ingo). Ftrace should make the user space part of mmiotracing as simple as 'cat /debug/trace/mmio > dump.txt'. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/kernel/init_task.c1
-rw-r--r--arch/x86/kernel/mmiotrace/Makefile8
-rw-r--r--arch/x86/kernel/mmiotrace/kmmio.c349
-rw-r--r--arch/x86/kernel/mmiotrace/kmmio.h58
-rw-r--r--arch/x86/kernel/mmiotrace/mmio-mod.c81
-rw-r--r--arch/x86/kernel/mmiotrace/pf_in.c2
-rw-r--r--arch/x86/kernel/mmiotrace/testmmiotrace.c13
-rw-r--r--arch/x86/mm/fault.c59
-rw-r--r--include/asm-x86/kdebug.h7
-rw-r--r--include/linux/mmiotrace.h38
10 files changed, 335 insertions, 281 deletions
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index 027a5b6a12b2..a4f93b4120c1 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -15,7 +15,6 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
15static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 15static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
16struct mm_struct init_mm = INIT_MM(init_mm); 16struct mm_struct init_mm = INIT_MM(init_mm);
17EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */ 17EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */
18EXPORT_SYMBOL_GPL(init_mm);
19 18
20/* 19/*
21 * Initial thread structure. 20 * Initial thread structure.
diff --git a/arch/x86/kernel/mmiotrace/Makefile b/arch/x86/kernel/mmiotrace/Makefile
index d6905f7f981b..cf1e747b463e 100644
--- a/arch/x86/kernel/mmiotrace/Makefile
+++ b/arch/x86/kernel/mmiotrace/Makefile
@@ -1,4 +1,4 @@
1obj-$(CONFIG_MMIOTRACE) += mmiotrace.o 1obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
2mmiotrace-objs := pf_in.o kmmio.o mmio-mod.o 2obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
3 3mmiotrace-objs := pf_in.o mmio-mod.o
4obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 4obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c
index 5e239d0b8467..539a9b19588f 100644
--- a/arch/x86/kernel/mmiotrace/kmmio.c
+++ b/arch/x86/kernel/mmiotrace/kmmio.c
@@ -6,6 +6,7 @@
6 */ 6 */
7 7
8#include <linux/version.h> 8#include <linux/version.h>
9#include <linux/list.h>
9#include <linux/spinlock.h> 10#include <linux/spinlock.h>
10#include <linux/hash.h> 11#include <linux/hash.h>
11#include <linux/init.h> 12#include <linux/init.h>
@@ -17,70 +18,119 @@
17#include <linux/ptrace.h> 18#include <linux/ptrace.h>
18#include <linux/preempt.h> 19#include <linux/preempt.h>
19#include <linux/percpu.h> 20#include <linux/percpu.h>
21#include <linux/kdebug.h>
20#include <asm/io.h> 22#include <asm/io.h>
21#include <asm/cacheflush.h> 23#include <asm/cacheflush.h>
22#include <asm/errno.h> 24#include <asm/errno.h>
23#include <asm/tlbflush.h> 25#include <asm/tlbflush.h>
24#include <asm/pgtable.h> 26#include <asm/pgtable.h>
25 27
26#include "kmmio.h" 28#include <linux/mmiotrace.h>
27 29
28#define KMMIO_HASH_BITS 6
29#define KMMIO_TABLE_SIZE (1 << KMMIO_HASH_BITS)
30#define KMMIO_PAGE_HASH_BITS 4 30#define KMMIO_PAGE_HASH_BITS 4
31#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS) 31#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
32 32
33struct kmmio_fault_page {
34 struct list_head list;
35 struct kmmio_fault_page *release_next;
36 unsigned long page; /* location of the fault page */
37
38 /*
39 * Number of times this page has been registered as a part
40 * of a probe. If zero, page is disarmed and this may be freed.
41 * Used only by writers (RCU).
42 */
43 int count;
44};
45
46struct kmmio_delayed_release {
47 struct rcu_head rcu;
48 struct kmmio_fault_page *release_list;
49};
50
33struct kmmio_context { 51struct kmmio_context {
34 struct kmmio_fault_page *fpage; 52 struct kmmio_fault_page *fpage;
35 struct kmmio_probe *probe; 53 struct kmmio_probe *probe;
36 unsigned long saved_flags; 54 unsigned long saved_flags;
55 unsigned long addr;
37 int active; 56 int active;
38}; 57};
39 58
40static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code,
41 unsigned long address);
42static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, 59static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
43 void *args); 60 void *args);
44 61
62static DECLARE_MUTEX(kmmio_init_mutex);
45static DEFINE_SPINLOCK(kmmio_lock); 63static DEFINE_SPINLOCK(kmmio_lock);
46 64
47/* These are protected by kmmio_lock */ 65/* These are protected by kmmio_lock */
66static int kmmio_initialized;
48unsigned int kmmio_count; 67unsigned int kmmio_count;
49static unsigned int handler_registered; 68
69/* Read-protected by RCU, write-protected by kmmio_lock. */
50static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; 70static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
51static LIST_HEAD(kmmio_probes); 71static LIST_HEAD(kmmio_probes);
52 72
73static struct list_head *kmmio_page_list(unsigned long page)
74{
75 return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
76}
77
53/* Accessed per-cpu */ 78/* Accessed per-cpu */
54static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); 79static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
55 80
81/* protected by kmmio_init_mutex */
56static struct notifier_block nb_die = { 82static struct notifier_block nb_die = {
57 .notifier_call = kmmio_die_notifier 83 .notifier_call = kmmio_die_notifier
58}; 84};
59 85
60int init_kmmio(void) 86/**
87 * Makes sure kmmio is initialized and usable.
88 * This must be called before any other kmmio function defined here.
89 * May sleep.
90 */
91void reference_kmmio(void)
61{ 92{
62 int i; 93 down(&kmmio_init_mutex);
63 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) 94 spin_lock_irq(&kmmio_lock);
64 INIT_LIST_HEAD(&kmmio_page_table[i]); 95 if (!kmmio_initialized) {
65 96 int i;
66 register_die_notifier(&nb_die); 97 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
67 return 0; 98 INIT_LIST_HEAD(&kmmio_page_table[i]);
99 if (register_die_notifier(&nb_die))
100 BUG();
101 }
102 kmmio_initialized++;
103 spin_unlock_irq(&kmmio_lock);
104 up(&kmmio_init_mutex);
68} 105}
106EXPORT_SYMBOL_GPL(reference_kmmio);
69 107
70void cleanup_kmmio(void) 108/**
109 * Clean up kmmio after use. This must be called for every call to
110 * reference_kmmio(). All probes registered after the corresponding
111 * reference_kmmio() must have been unregistered when calling this.
112 * May sleep.
113 */
114void unreference_kmmio(void)
71{ 115{
72 /* 116 bool unreg = false;
73 * Assume the following have been already cleaned by calling 117
74 * unregister_kmmio_probe() appropriately: 118 down(&kmmio_init_mutex);
75 * kmmio_page_table, kmmio_probes 119 spin_lock_irq(&kmmio_lock);
76 */ 120
77 if (handler_registered) { 121 if (kmmio_initialized == 1) {
78 if (mmiotrace_unregister_pf(&kmmio_page_fault)) 122 BUG_ON(is_kmmio_active());
79 BUG(); 123 unreg = true;
80 synchronize_rcu();
81 } 124 }
82 unregister_die_notifier(&nb_die); 125 kmmio_initialized--;
126 BUG_ON(kmmio_initialized < 0);
127 spin_unlock_irq(&kmmio_lock);
128
129 if (unreg)
130 unregister_die_notifier(&nb_die); /* calls sync_rcu() */
131 up(&kmmio_init_mutex);
83} 132}
133EXPORT_SYMBOL(unreference_kmmio);
84 134
85/* 135/*
86 * this is basically a dynamic stabbing problem: 136 * this is basically a dynamic stabbing problem:
@@ -90,33 +140,33 @@ void cleanup_kmmio(void)
90 * Overlap a Point (might be simple) 140 * Overlap a Point (might be simple)
91 * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup 141 * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
92 */ 142 */
93/* Get the kmmio at this addr (if any). You must be holding kmmio_lock. */ 143/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
94static struct kmmio_probe *get_kmmio_probe(unsigned long addr) 144static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
95{ 145{
96 struct kmmio_probe *p; 146 struct kmmio_probe *p;
97 list_for_each_entry(p, &kmmio_probes, list) { 147 list_for_each_entry_rcu(p, &kmmio_probes, list) {
98 if (addr >= p->addr && addr <= (p->addr + p->len)) 148 if (addr >= p->addr && addr <= (p->addr + p->len))
99 return p; 149 return p;
100 } 150 }
101 return NULL; 151 return NULL;
102} 152}
103 153
154/* You must be holding RCU read lock. */
104static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) 155static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
105{ 156{
106 struct list_head *head, *tmp; 157 struct list_head *head;
158 struct kmmio_fault_page *p;
107 159
108 page &= PAGE_MASK; 160 page &= PAGE_MASK;
109 head = &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; 161 head = kmmio_page_list(page);
110 list_for_each(tmp, head) { 162 list_for_each_entry_rcu(p, head, list) {
111 struct kmmio_fault_page *p
112 = list_entry(tmp, struct kmmio_fault_page, list);
113 if (p->page == page) 163 if (p->page == page)
114 return p; 164 return p;
115 } 165 }
116
117 return NULL; 166 return NULL;
118} 167}
119 168
169/** Mark the given page as not present. Access to it will trigger a fault. */
120static void arm_kmmio_fault_page(unsigned long page, int *page_level) 170static void arm_kmmio_fault_page(unsigned long page, int *page_level)
121{ 171{
122 unsigned long address = page & PAGE_MASK; 172 unsigned long address = page & PAGE_MASK;
@@ -124,8 +174,8 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level)
124 pte_t *pte = lookup_address(address, &level); 174 pte_t *pte = lookup_address(address, &level);
125 175
126 if (!pte) { 176 if (!pte) {
127 printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", 177 pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
128 __FUNCTION__, page); 178 __func__, page);
129 return; 179 return;
130 } 180 }
131 181
@@ -143,6 +193,7 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level)
143 __flush_tlb_one(page); 193 __flush_tlb_one(page);
144} 194}
145 195
196/** Mark the given page as present. */
146static void disarm_kmmio_fault_page(unsigned long page, int *page_level) 197static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
147{ 198{
148 unsigned long address = page & PAGE_MASK; 199 unsigned long address = page & PAGE_MASK;
@@ -150,8 +201,8 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
150 pte_t *pte = lookup_address(address, &level); 201 pte_t *pte = lookup_address(address, &level);
151 202
152 if (!pte) { 203 if (!pte) {
153 printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", 204 pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
154 __FUNCTION__, page); 205 __func__, page);
155 return; 206 return;
156 } 207 }
157 208
@@ -170,12 +221,24 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
170} 221}
171 222
172/* 223/*
224 * This is being called from do_page_fault().
225 *
226 * We may be in an interrupt or a critical section. Also prefecthing may
227 * trigger a page fault. We may be in the middle of process switch.
228 * We cannot take any locks, because we could be executing especially
229 * within a kmmio critical section.
230 *
231 * Local interrupts are disabled, so preemption cannot happen.
232 * Do not enable interrupts, do not sleep, and watch out for other CPUs.
233 */
234/*
173 * Interrupts are disabled on entry as trap3 is an interrupt gate 235 * Interrupts are disabled on entry as trap3 is an interrupt gate
174 * and they remain disabled thorough out this function. 236 * and they remain disabled thorough out this function.
175 */ 237 */
176static int kmmio_handler(struct pt_regs *regs, unsigned long addr) 238int kmmio_handler(struct pt_regs *regs, unsigned long addr)
177{ 239{
178 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); 240 struct kmmio_context *ctx;
241 struct kmmio_fault_page *faultpage;
179 242
180 /* 243 /*
181 * Preemption is now disabled to prevent process switch during 244 * Preemption is now disabled to prevent process switch during
@@ -186,40 +249,40 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr)
186 * XXX what if an interrupt occurs between returning from 249 * XXX what if an interrupt occurs between returning from
187 * do_page_fault() and entering the single-step exception handler? 250 * do_page_fault() and entering the single-step exception handler?
188 * And that interrupt triggers a kmmio trap? 251 * And that interrupt triggers a kmmio trap?
252 * XXX If we tracing an interrupt service routine or whatever, is
253 * this enough to keep it on the current cpu?
189 */ 254 */
190 preempt_disable(); 255 preempt_disable();
191 256
192 /* interrupts disabled and CPU-local data => atomicity guaranteed. */ 257 rcu_read_lock();
258 faultpage = get_kmmio_fault_page(addr);
259 if (!faultpage) {
260 /*
261 * Either this page fault is not caused by kmmio, or
262 * another CPU just pulled the kmmio probe from under
263 * our feet. In the latter case all hell breaks loose.
264 */
265 goto no_kmmio;
266 }
267
268 ctx = &get_cpu_var(kmmio_ctx);
193 if (ctx->active) { 269 if (ctx->active) {
194 /* 270 /*
195 * This avoids a deadlock with kmmio_lock. 271 * Prevent overwriting already in-flight context.
196 * If this page fault really was due to kmmio trap, 272 * If this page fault really was due to kmmio trap,
197 * all hell breaks loose. 273 * all hell breaks loose.
198 */ 274 */
199 printk(KERN_EMERG "mmiotrace: recursive probe hit on CPU %d, " 275 pr_emerg("kmmio: recursive probe hit on CPU %d, "
200 "for address %lu. Ignoring.\n", 276 "for address 0x%08lx. Ignoring.\n",
201 smp_processor_id(), addr); 277 smp_processor_id(), addr);
202 goto no_kmmio; 278 goto no_kmmio_ctx;
203 } 279 }
204 ctx->active++; 280 ctx->active++;
205 281
206 /* 282 ctx->fpage = faultpage;
207 * Acquire the kmmio lock to prevent changes affecting
208 * get_kmmio_fault_page() and get_kmmio_probe(), since we save their
209 * returned pointers.
210 * The lock is released in post_kmmio_handler().
211 * XXX: could/should get_kmmio_*() be using RCU instead of spinlock?
212 */
213 spin_lock(&kmmio_lock);
214
215 ctx->fpage = get_kmmio_fault_page(addr);
216 if (!ctx->fpage) {
217 /* this page fault is not caused by kmmio */
218 goto no_kmmio_locked;
219 }
220
221 ctx->probe = get_kmmio_probe(addr); 283 ctx->probe = get_kmmio_probe(addr);
222 ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK)); 284 ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK));
285 ctx->addr = addr;
223 286
224 if (ctx->probe && ctx->probe->pre_handler) 287 if (ctx->probe && ctx->probe->pre_handler)
225 ctx->probe->pre_handler(ctx->probe, regs, addr); 288 ctx->probe->pre_handler(ctx->probe, regs, addr);
@@ -227,46 +290,62 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr)
227 regs->flags |= TF_MASK; 290 regs->flags |= TF_MASK;
228 regs->flags &= ~IF_MASK; 291 regs->flags &= ~IF_MASK;
229 292
230 /* We hold lock, now we set present bit in PTE and single step. */ 293 /* Now we set present bit in PTE and single step. */
231 disarm_kmmio_fault_page(ctx->fpage->page, NULL); 294 disarm_kmmio_fault_page(ctx->fpage->page, NULL);
232 295
233 put_cpu_var(kmmio_ctx); 296 put_cpu_var(kmmio_ctx);
297 rcu_read_unlock();
234 return 1; 298 return 1;
235 299
236no_kmmio_locked: 300no_kmmio_ctx:
237 spin_unlock(&kmmio_lock); 301 put_cpu_var(kmmio_ctx);
238 ctx->active--;
239no_kmmio: 302no_kmmio:
303 rcu_read_unlock();
240 preempt_enable_no_resched(); 304 preempt_enable_no_resched();
241 put_cpu_var(kmmio_ctx); 305 return 0; /* page fault not handled by kmmio */
242 /* page fault not handled by kmmio */
243 return 0;
244} 306}
245 307
246/* 308/*
247 * Interrupts are disabled on entry as trap1 is an interrupt gate 309 * Interrupts are disabled on entry as trap1 is an interrupt gate
248 * and they remain disabled thorough out this function. 310 * and they remain disabled thorough out this function.
249 * And we hold kmmio lock. 311 * This must always get called as the pair to kmmio_handler().
250 */ 312 */
251static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) 313static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
252{ 314{
253 int ret = 0; 315 int ret = 0;
316 struct kmmio_probe *probe;
317 struct kmmio_fault_page *faultpage;
254 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); 318 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
255 319
256 if (!ctx->active) 320 if (!ctx->active)
257 goto out; 321 goto out;
258 322
323 rcu_read_lock();
324
325 faultpage = get_kmmio_fault_page(ctx->addr);
326 probe = get_kmmio_probe(ctx->addr);
327 if (faultpage != ctx->fpage || probe != ctx->probe) {
328 /*
329 * The trace setup changed after kmmio_handler() and before
330 * running this respective post handler. User does not want
331 * the result anymore.
332 */
333 ctx->probe = NULL;
334 ctx->fpage = NULL;
335 }
336
259 if (ctx->probe && ctx->probe->post_handler) 337 if (ctx->probe && ctx->probe->post_handler)
260 ctx->probe->post_handler(ctx->probe, condition, regs); 338 ctx->probe->post_handler(ctx->probe, condition, regs);
261 339
262 arm_kmmio_fault_page(ctx->fpage->page, NULL); 340 if (ctx->fpage)
341 arm_kmmio_fault_page(ctx->fpage->page, NULL);
263 342
264 regs->flags &= ~TF_MASK; 343 regs->flags &= ~TF_MASK;
265 regs->flags |= ctx->saved_flags; 344 regs->flags |= ctx->saved_flags;
266 345
267 /* These were acquired in kmmio_handler(). */ 346 /* These were acquired in kmmio_handler(). */
268 ctx->active--; 347 ctx->active--;
269 spin_unlock(&kmmio_lock); 348 BUG_ON(ctx->active);
270 preempt_enable_no_resched(); 349 preempt_enable_no_resched();
271 350
272 /* 351 /*
@@ -277,11 +356,13 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
277 if (!(regs->flags & TF_MASK)) 356 if (!(regs->flags & TF_MASK))
278 ret = 1; 357 ret = 1;
279 358
359 rcu_read_unlock();
280out: 360out:
281 put_cpu_var(kmmio_ctx); 361 put_cpu_var(kmmio_ctx);
282 return ret; 362 return ret;
283} 363}
284 364
365/* You must be holding kmmio_lock. */
285static int add_kmmio_fault_page(unsigned long page) 366static int add_kmmio_fault_page(unsigned long page)
286{ 367{
287 struct kmmio_fault_page *f; 368 struct kmmio_fault_page *f;
@@ -289,6 +370,8 @@ static int add_kmmio_fault_page(unsigned long page)
289 page &= PAGE_MASK; 370 page &= PAGE_MASK;
290 f = get_kmmio_fault_page(page); 371 f = get_kmmio_fault_page(page);
291 if (f) { 372 if (f) {
373 if (!f->count)
374 arm_kmmio_fault_page(f->page, NULL);
292 f->count++; 375 f->count++;
293 return 0; 376 return 0;
294 } 377 }
@@ -299,15 +382,16 @@ static int add_kmmio_fault_page(unsigned long page)
299 382
300 f->count = 1; 383 f->count = 1;
301 f->page = page; 384 f->page = page;
302 list_add(&f->list, 385 list_add_rcu(&f->list, kmmio_page_list(f->page));
303 &kmmio_page_table[hash_long(f->page, KMMIO_PAGE_HASH_BITS)]);
304 386
305 arm_kmmio_fault_page(f->page, NULL); 387 arm_kmmio_fault_page(f->page, NULL);
306 388
307 return 0; 389 return 0;
308} 390}
309 391
310static void release_kmmio_fault_page(unsigned long page) 392/* You must be holding kmmio_lock. */
393static void release_kmmio_fault_page(unsigned long page,
394 struct kmmio_fault_page **release_list)
311{ 395{
312 struct kmmio_fault_page *f; 396 struct kmmio_fault_page *f;
313 397
@@ -317,9 +401,11 @@ static void release_kmmio_fault_page(unsigned long page)
317 return; 401 return;
318 402
319 f->count--; 403 f->count--;
404 BUG_ON(f->count < 0);
320 if (!f->count) { 405 if (!f->count) {
321 disarm_kmmio_fault_page(f->page, NULL); 406 disarm_kmmio_fault_page(f->page, NULL);
322 list_del(&f->list); 407 f->release_next = *release_list;
408 *release_list = f;
323 } 409 }
324} 410}
325 411
@@ -334,68 +420,113 @@ int register_kmmio_probe(struct kmmio_probe *p)
334 ret = -EEXIST; 420 ret = -EEXIST;
335 goto out; 421 goto out;
336 } 422 }
337 list_add(&p->list, &kmmio_probes); 423 list_add_rcu(&p->list, &kmmio_probes);
338 /*printk("adding fault pages...\n");*/
339 while (size < p->len) { 424 while (size < p->len) {
340 if (add_kmmio_fault_page(p->addr + size)) 425 if (add_kmmio_fault_page(p->addr + size))
341 printk(KERN_ERR "mmio: Unable to set page fault.\n"); 426 pr_err("kmmio: Unable to set page fault.\n");
342 size += PAGE_SIZE; 427 size += PAGE_SIZE;
343 } 428 }
344
345 if (!handler_registered) {
346 if (mmiotrace_register_pf(&kmmio_page_fault))
347 printk(KERN_ERR "mmiotrace: Cannot register page "
348 "fault handler.\n");
349 else
350 handler_registered++;
351 }
352
353out: 429out:
354 spin_unlock_irq(&kmmio_lock); 430 spin_unlock_irq(&kmmio_lock);
355 /* 431 /*
356 * XXX: What should I do here? 432 * XXX: What should I do here?
357 * Here was a call to global_flush_tlb(), but it does not exist 433 * Here was a call to global_flush_tlb(), but it does not exist
358 * anymore. 434 * anymore. It seems it's not needed after all.
359 */ 435 */
360 return ret; 436 return ret;
361} 437}
438EXPORT_SYMBOL(register_kmmio_probe);
362 439
440static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
441{
442 struct kmmio_delayed_release *dr = container_of(
443 head,
444 struct kmmio_delayed_release,
445 rcu);
446 struct kmmio_fault_page *p = dr->release_list;
447 while (p) {
448 struct kmmio_fault_page *next = p->release_next;
449 BUG_ON(p->count);
450 kfree(p);
451 p = next;
452 }
453 kfree(dr);
454}
455
456static void remove_kmmio_fault_pages(struct rcu_head *head)
457{
458 struct kmmio_delayed_release *dr = container_of(
459 head,
460 struct kmmio_delayed_release,
461 rcu);
462 struct kmmio_fault_page *p = dr->release_list;
463 struct kmmio_fault_page **prevp = &dr->release_list;
464 unsigned long flags;
465 spin_lock_irqsave(&kmmio_lock, flags);
466 while (p) {
467 if (!p->count)
468 list_del_rcu(&p->list);
469 else
470 *prevp = p->release_next;
471 prevp = &p->release_next;
472 p = p->release_next;
473 }
474 spin_unlock_irqrestore(&kmmio_lock, flags);
475 /* This is the real RCU destroy call. */
476 call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
477}
478
479/*
480 * Remove a kmmio probe. You have to synchronize_rcu() before you can be
481 * sure that the callbacks will not be called anymore.
482 *
483 * Unregistering a kmmio fault page has three steps:
484 * 1. release_kmmio_fault_page()
485 * Disarm the page, wait a grace period to let all faults finish.
486 * 2. remove_kmmio_fault_pages()
487 * Remove the pages from kmmio_page_table.
488 * 3. rcu_free_kmmio_fault_pages()
489 * Actally free the kmmio_fault_page structs as with RCU.
490 */
363void unregister_kmmio_probe(struct kmmio_probe *p) 491void unregister_kmmio_probe(struct kmmio_probe *p)
364{ 492{
365 unsigned long size = 0; 493 unsigned long size = 0;
494 struct kmmio_fault_page *release_list = NULL;
495 struct kmmio_delayed_release *drelease;
366 496
367 spin_lock_irq(&kmmio_lock); 497 spin_lock_irq(&kmmio_lock);
368 while (size < p->len) { 498 while (size < p->len) {
369 release_kmmio_fault_page(p->addr + size); 499 release_kmmio_fault_page(p->addr + size, &release_list);
370 size += PAGE_SIZE; 500 size += PAGE_SIZE;
371 } 501 }
372 list_del(&p->list); 502 list_del_rcu(&p->list);
373 kmmio_count--; 503 kmmio_count--;
374 spin_unlock_irq(&kmmio_lock); 504 spin_unlock_irq(&kmmio_lock);
375}
376 505
377/* 506 drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
378 * According to 2.6.20, mainly x86_64 arch: 507 if (!drelease) {
379 * This is being called from do_page_fault(), via the page fault notifier 508 pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
380 * chain. The chain is called for both user space faults and kernel space 509 return;
381 * faults (address >= TASK_SIZE64), except not on faults serviced by 510 }
382 * vmalloc_fault(). 511 drelease->release_list = release_list;
383 * 512
384 * We may be in an interrupt or a critical section. Also prefecthing may 513 /*
385 * trigger a page fault. We may be in the middle of process switch. 514 * This is not really RCU here. We have just disarmed a set of
386 * The page fault hook functionality has put us inside RCU read lock. 515 * pages so that they cannot trigger page faults anymore. However,
387 * 516 * we cannot remove the pages from kmmio_page_table,
388 * Local interrupts are disabled, so preemption cannot happen. 517 * because a probe hit might be in flight on another CPU. The
389 * Do not enable interrupts, do not sleep, and watch out for other CPUs. 518 * pages are collected into a list, and they will be removed from
390 */ 519 * kmmio_page_table when it is certain that no probe hit related to
391static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code, 520 * these pages can be in flight. RCU grace period sounds like a
392 unsigned long address) 521 * good choice.
393{ 522 *
394 if (is_kmmio_active()) 523 * If we removed the pages too early, kmmio page fault handler might
395 if (kmmio_handler(regs, address) == 1) 524 * not find the respective kmmio_fault_page and determine it's not
396 return -1; 525 * a kmmio fault, when it actually is. This would lead to madness.
397 return 0; 526 */
527 call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
398} 528}
529EXPORT_SYMBOL(unregister_kmmio_probe);
399 530
400static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, 531static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
401 void *args) 532 void *args)
diff --git a/arch/x86/kernel/mmiotrace/kmmio.h b/arch/x86/kernel/mmiotrace/kmmio.h
deleted file mode 100644
index 85b7f68a3b8a..000000000000
--- a/arch/x86/kernel/mmiotrace/kmmio.h
+++ /dev/null
@@ -1,58 +0,0 @@
1#ifndef _LINUX_KMMIO_H
2#define _LINUX_KMMIO_H
3
4#include <linux/list.h>
5#include <linux/notifier.h>
6#include <linux/smp.h>
7#include <linux/types.h>
8#include <linux/ptrace.h>
9#include <linux/version.h>
10#include <linux/kdebug.h>
11
12struct kmmio_probe;
13struct kmmio_fault_page;
14struct pt_regs;
15
16typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *,
17 struct pt_regs *, unsigned long addr);
18typedef void (*kmmio_post_handler_t)(struct kmmio_probe *,
19 unsigned long condition, struct pt_regs *);
20
21struct kmmio_probe {
22 struct list_head list;
23
24 /* start location of the probe point */
25 unsigned long addr;
26
27 /* length of the probe region */
28 unsigned long len;
29
30 /* Called before addr is executed. */
31 kmmio_pre_handler_t pre_handler;
32
33 /* Called after addr is executed, unless... */
34 kmmio_post_handler_t post_handler;
35};
36
37struct kmmio_fault_page {
38 struct list_head list;
39
40 /* location of the fault page */
41 unsigned long page;
42
43 int count;
44};
45
46/* kmmio is active by some kmmio_probes? */
47static inline int is_kmmio_active(void)
48{
49 extern unsigned int kmmio_count;
50 return kmmio_count;
51}
52
53int init_kmmio(void);
54void cleanup_kmmio(void);
55int register_kmmio_probe(struct kmmio_probe *p);
56void unregister_kmmio_probe(struct kmmio_probe *p);
57
58#endif /* _LINUX_KMMIO_H */
diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c
index f9c609266d83..e1a508588f03 100644
--- a/arch/x86/kernel/mmiotrace/mmio-mod.c
+++ b/arch/x86/kernel/mmiotrace/mmio-mod.c
@@ -32,7 +32,6 @@
32#include <asm/atomic.h> 32#include <asm/atomic.h>
33#include <linux/percpu.h> 33#include <linux/percpu.h>
34 34
35#include "kmmio.h"
36#include "pf_in.h" 35#include "pf_in.h"
37 36
38/* This app's relay channel files will appear in /debug/mmio-trace */ 37/* This app's relay channel files will appear in /debug/mmio-trace */
@@ -129,18 +128,17 @@ static void print_pte(unsigned long address)
129 pte_t *pte = lookup_address(address, &level); 128 pte_t *pte = lookup_address(address, &level);
130 129
131 if (!pte) { 130 if (!pte) {
132 printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", 131 pr_err(MODULE_NAME ": Error in %s: no pte for page 0x%08lx\n",
133 __FUNCTION__, address); 132 __func__, address);
134 return; 133 return;
135 } 134 }
136 135
137 if (level == PG_LEVEL_2M) { 136 if (level == PG_LEVEL_2M) {
138 printk(KERN_EMERG MODULE_NAME ": 4MB pages are not " 137 pr_emerg(MODULE_NAME ": 4MB pages are not currently "
139 "currently supported: %lx\n", 138 "supported: %lx\n", address);
140 address);
141 BUG(); 139 BUG();
142 } 140 }
143 printk(KERN_DEBUG MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n", 141 pr_info(MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n",
144 address, pte_val(*pte), 142 address, pte_val(*pte),
145 pte_val(*pte) & _PAGE_PRESENT); 143 pte_val(*pte) & _PAGE_PRESENT);
146} 144}
@@ -152,7 +150,7 @@ static void print_pte(unsigned long address)
152static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) 150static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
153{ 151{
154 const struct trap_reason *my_reason = &get_cpu_var(pf_reason); 152 const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
155 printk(KERN_EMERG MODULE_NAME ": unexpected fault for address: %lx, " 153 pr_emerg(MODULE_NAME ": unexpected fault for address: %lx, "
156 "last fault for address: %lx\n", 154 "last fault for address: %lx\n",
157 addr, my_reason->addr); 155 addr, my_reason->addr);
158 print_pte(addr); 156 print_pte(addr);
@@ -160,20 +158,17 @@ static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
160 print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip); 158 print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip);
161 print_symbol(KERN_EMERG "last faulting EIP was at %s\n", 159 print_symbol(KERN_EMERG "last faulting EIP was at %s\n",
162 my_reason->ip); 160 my_reason->ip);
163 printk(KERN_EMERG 161 pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
164 "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
165 regs->ax, regs->bx, regs->cx, regs->dx); 162 regs->ax, regs->bx, regs->cx, regs->dx);
166 printk(KERN_EMERG 163 pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
167 "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
168 regs->si, regs->di, regs->bp, regs->sp); 164 regs->si, regs->di, regs->bp, regs->sp);
169#else 165#else
170 print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip); 166 print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip);
171 print_symbol(KERN_EMERG "last faulting RIP was at %s\n", 167 print_symbol(KERN_EMERG "last faulting RIP was at %s\n",
172 my_reason->ip); 168 my_reason->ip);
173 printk(KERN_EMERG "rax: %016lx rcx: %016lx rdx: %016lx\n", 169 pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n",
174 regs->ax, regs->cx, regs->dx); 170 regs->ax, regs->cx, regs->dx);
175 printk(KERN_EMERG "rsi: %016lx rdi: %016lx " 171 pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n",
176 "rbp: %016lx rsp: %016lx\n",
177 regs->si, regs->di, regs->bp, regs->sp); 172 regs->si, regs->di, regs->bp, regs->sp);
178#endif 173#endif
179 put_cpu_var(pf_reason); 174 put_cpu_var(pf_reason);
@@ -251,10 +246,15 @@ static void post(struct kmmio_probe *p, unsigned long condition,
251 struct trap_reason *my_reason = &get_cpu_var(pf_reason); 246 struct trap_reason *my_reason = &get_cpu_var(pf_reason);
252 struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); 247 struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace);
253 248
249 /*
250 * XXX: This might not get called, if the probe is removed while
251 * trace hit is on flight.
252 */
253
254 /* this should always return the active_trace count to 0 */ 254 /* this should always return the active_trace count to 0 */
255 my_reason->active_traces--; 255 my_reason->active_traces--;
256 if (my_reason->active_traces) { 256 if (my_reason->active_traces) {
257 printk(KERN_EMERG MODULE_NAME ": unexpected post handler"); 257 pr_emerg(MODULE_NAME ": unexpected post handler");
258 BUG(); 258 BUG();
259 } 259 }
260 260
@@ -283,16 +283,15 @@ static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf,
283 atomic_t *drop = &per_cpu(dropped, cpu); 283 atomic_t *drop = &per_cpu(dropped, cpu);
284 int count; 284 int count;
285 if (relay_buf_full(buf)) { 285 if (relay_buf_full(buf)) {
286 if (atomic_inc_return(drop) == 1) { 286 if (atomic_inc_return(drop) == 1)
287 printk(KERN_ERR MODULE_NAME ": cpu %d buffer full!\n", 287 pr_err(MODULE_NAME ": cpu %d buffer full!\n", cpu);
288 cpu);
289 }
290 return 0; 288 return 0;
291 } else if ((count = atomic_read(drop))) { 289 }
292 printk(KERN_ERR MODULE_NAME 290 count = atomic_read(drop);
293 ": cpu %d buffer no longer full, " 291 if (count) {
294 "missed %d events.\n", 292 pr_err(MODULE_NAME ": cpu %d buffer no longer full, "
295 cpu, count); 293 "missed %d events.\n",
294 cpu, count);
296 atomic_sub(count, drop); 295 atomic_sub(count, drop);
297 } 296 }
298 297
@@ -407,8 +406,8 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size,
407 /* Don't trace the low PCI/ISA area, it's always mapped.. */ 406 /* Don't trace the low PCI/ISA area, it's always mapped.. */
408 if (!ISA_trace && (offset < ISA_END_ADDRESS) && 407 if (!ISA_trace && (offset < ISA_END_ADDRESS) &&
409 (offset + size > ISA_START_ADDRESS)) { 408 (offset + size > ISA_START_ADDRESS)) {
410 printk(KERN_NOTICE MODULE_NAME ": Ignoring map of low " 409 pr_notice(MODULE_NAME ": Ignoring map of low PCI/ISA area "
411 "PCI/ISA area (0x%lx-0x%lx)\n", 410 "(0x%lx-0x%lx)\n",
412 offset, offset + size); 411 offset, offset + size);
413 return; 412 return;
414 } 413 }
@@ -418,7 +417,7 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size,
418void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size) 417void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size)
419{ 418{
420 void __iomem *p = ioremap_cache(offset, size); 419 void __iomem *p = ioremap_cache(offset, size);
421 printk(KERN_DEBUG MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n", 420 pr_debug(MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n",
422 offset, size, p); 421 offset, size, p);
423 ioremap_trace_core(offset, size, p); 422 ioremap_trace_core(offset, size, p);
424 return p; 423 return p;
@@ -428,7 +427,7 @@ EXPORT_SYMBOL(ioremap_cache_trace);
428void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size) 427void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size)
429{ 428{
430 void __iomem *p = ioremap_nocache(offset, size); 429 void __iomem *p = ioremap_nocache(offset, size);
431 printk(KERN_DEBUG MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n", 430 pr_debug(MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n",
432 offset, size, p); 431 offset, size, p);
433 ioremap_trace_core(offset, size, p); 432 ioremap_trace_core(offset, size, p);
434 return p; 433 return p;
@@ -455,7 +454,7 @@ void iounmap_trace(volatile void __iomem *addr)
455 }; 454 };
456 struct remap_trace *trace; 455 struct remap_trace *trace;
457 struct remap_trace *tmp; 456 struct remap_trace *tmp;
458 printk(KERN_DEBUG MODULE_NAME ": Unmapping %p.\n", addr); 457 pr_debug(MODULE_NAME ": Unmapping %p.\n", addr);
459 record_timestamp(&event.header); 458 record_timestamp(&event.header);
460 459
461 spin_lock(&trace_list_lock); 460 spin_lock(&trace_list_lock);
@@ -481,7 +480,7 @@ static void clear_trace_list(void)
481 480
482 spin_lock(&trace_list_lock); 481 spin_lock(&trace_list_lock);
483 list_for_each_entry_safe(trace, tmp, &trace_list, list) { 482 list_for_each_entry_safe(trace, tmp, &trace_list, list) {
484 printk(KERN_WARNING MODULE_NAME ": purging non-iounmapped " 483 pr_warning(MODULE_NAME ": purging non-iounmapped "
485 "trace @0x%08lx, size 0x%lx.\n", 484 "trace @0x%08lx, size 0x%lx.\n",
486 trace->probe.addr, trace->probe.len); 485 trace->probe.addr, trace->probe.len);
487 if (!nommiotrace) 486 if (!nommiotrace)
@@ -500,39 +499,37 @@ static int __init init(void)
500 499
501 dir = debugfs_create_dir(APP_DIR, NULL); 500 dir = debugfs_create_dir(APP_DIR, NULL);
502 if (!dir) { 501 if (!dir) {
503 printk(KERN_ERR MODULE_NAME 502 pr_err(MODULE_NAME ": Couldn't create relay app directory.\n");
504 ": Couldn't create relay app directory.\n");
505 return -ENOMEM; 503 return -ENOMEM;
506 } 504 }
507 505
508 chan = create_channel(subbuf_size, n_subbufs); 506 chan = create_channel(subbuf_size, n_subbufs);
509 if (!chan) { 507 if (!chan) {
510 debugfs_remove(dir); 508 debugfs_remove(dir);
511 printk(KERN_ERR MODULE_NAME 509 pr_err(MODULE_NAME ": relay app channel creation failed\n");
512 ": relay app channel creation failed\n");
513 return -ENOMEM; 510 return -ENOMEM;
514 } 511 }
515 512
516 init_kmmio(); 513 reference_kmmio();
517 514
518 proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL); 515 proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL);
519 if (proc_marker_file) 516 if (proc_marker_file)
520 proc_marker_file->write_proc = write_marker; 517 proc_marker_file->write_proc = write_marker;
521 518
522 printk(KERN_DEBUG MODULE_NAME ": loaded.\n"); 519 pr_debug(MODULE_NAME ": loaded.\n");
523 if (nommiotrace) 520 if (nommiotrace)
524 printk(KERN_DEBUG MODULE_NAME ": MMIO tracing disabled.\n"); 521 pr_info(MODULE_NAME ": MMIO tracing disabled.\n");
525 if (ISA_trace) 522 if (ISA_trace)
526 printk(KERN_WARNING MODULE_NAME 523 pr_warning(MODULE_NAME ": Warning! low ISA range will be "
527 ": Warning! low ISA range will be traced.\n"); 524 "traced.\n");
528 return 0; 525 return 0;
529} 526}
530 527
531static void __exit cleanup(void) 528static void __exit cleanup(void)
532{ 529{
533 printk(KERN_DEBUG MODULE_NAME ": unload...\n"); 530 pr_debug(MODULE_NAME ": unload...\n");
534 clear_trace_list(); 531 clear_trace_list();
535 cleanup_kmmio(); 532 unreference_kmmio();
536 remove_proc_entry(MARKER_FILE, NULL); 533 remove_proc_entry(MARKER_FILE, NULL);
537 destroy_channel(); 534 destroy_channel();
538 if (dir) 535 if (dir)
diff --git a/arch/x86/kernel/mmiotrace/pf_in.c b/arch/x86/kernel/mmiotrace/pf_in.c
index 67ea520dde62..efa1911e20ca 100644
--- a/arch/x86/kernel/mmiotrace/pf_in.c
+++ b/arch/x86/kernel/mmiotrace/pf_in.c
@@ -19,7 +19,7 @@
19 * 19 *
20 */ 20 */
21 21
22/* $Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp $ 22/* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp
23 * Copyright by Intel Crop., 2002 23 * Copyright by Intel Crop., 2002
24 * Louis Zhuang (louis.zhuang@intel.com) 24 * Louis Zhuang (louis.zhuang@intel.com)
25 * 25 *
diff --git a/arch/x86/kernel/mmiotrace/testmmiotrace.c b/arch/x86/kernel/mmiotrace/testmmiotrace.c
index 40e66b0e6480..5ecff578672b 100644
--- a/arch/x86/kernel/mmiotrace/testmmiotrace.c
+++ b/arch/x86/kernel/mmiotrace/testmmiotrace.c
@@ -41,8 +41,7 @@ static void do_test(void)
41{ 41{
42 void __iomem *p = ioremap_nocache_trace(mmio_address, 0x4000); 42 void __iomem *p = ioremap_nocache_trace(mmio_address, 0x4000);
43 if (!p) { 43 if (!p) {
44 printk(KERN_ERR MODULE_NAME ": could not ioremap IO memory, " 44 pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
45 "aborting.\n");
46 return; 45 return;
47 } 46 }
48 do_write_test(p); 47 do_write_test(p);
@@ -53,14 +52,14 @@ static void do_test(void)
53static int __init init(void) 52static int __init init(void)
54{ 53{
55 if (mmio_address == 0) { 54 if (mmio_address == 0) {
56 printk(KERN_ERR MODULE_NAME ": you have to use the module " 55 pr_err(MODULE_NAME ": you have to use the module argument "
57 "argument mmio_address.\n"); 56 "mmio_address.\n");
58 printk(KERN_ERR MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" 57 pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
59 " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); 58 " YOU REALLY KNOW WHAT YOU ARE DOING!\n");
60 return -ENXIO; 59 return -ENXIO;
61 } 60 }
62 61
63 printk(KERN_WARNING MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " 62 pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
64 "in PCI address space, and writing " 63 "in PCI address space, and writing "
65 "rubbish in there.\n", mmio_address); 64 "rubbish in there.\n", mmio_address);
66 do_test(); 65 do_test();
@@ -69,7 +68,7 @@ static int __init init(void)
69 68
70static void __exit cleanup(void) 69static void __exit cleanup(void)
71{ 70{
72 printk(KERN_DEBUG MODULE_NAME ": unloaded.\n"); 71 pr_debug(MODULE_NAME ": unloaded.\n");
73} 72}
74 73
75module_init(init); 74module_init(init);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e9a086a1a9ff..8c828a68d3b6 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,6 +10,7 @@
10#include <linux/string.h> 10#include <linux/string.h>
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/ptrace.h> 12#include <linux/ptrace.h>
13#include <linux/mmiotrace.h>
13#include <linux/mman.h> 14#include <linux/mman.h>
14#include <linux/mm.h> 15#include <linux/mm.h>
15#include <linux/smp.h> 16#include <linux/smp.h>
@@ -49,60 +50,14 @@
49#define PF_RSVD (1<<3) 50#define PF_RSVD (1<<3)
50#define PF_INSTR (1<<4) 51#define PF_INSTR (1<<4)
51 52
52#ifdef CONFIG_MMIOTRACE_HOOKS 53static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
53static pf_handler_func mmiotrace_pf_handler; /* protected by RCU */
54static DEFINE_SPINLOCK(mmiotrace_handler_lock);
55
56int mmiotrace_register_pf(pf_handler_func new_pfh)
57{
58 int ret = 0;
59 unsigned long flags;
60 spin_lock_irqsave(&mmiotrace_handler_lock, flags);
61 if (mmiotrace_pf_handler)
62 ret = -EBUSY;
63 else
64 mmiotrace_pf_handler = new_pfh;
65 spin_unlock_irqrestore(&mmiotrace_handler_lock, flags);
66 return ret;
67}
68EXPORT_SYMBOL_GPL(mmiotrace_register_pf);
69
70/**
71 * mmiotrace_unregister_pf:
72 * The caller must ensure @old_pfh is not in use anymore before freeing it.
73 * This function does not guarantee it. The handler function pointer is
74 * protected by RCU, so you can do this by e.g. calling synchronize_rcu().
75 */
76int mmiotrace_unregister_pf(pf_handler_func old_pfh)
77{
78 int ret = 0;
79 unsigned long flags;
80 spin_lock_irqsave(&mmiotrace_handler_lock, flags);
81 if (mmiotrace_pf_handler != old_pfh)
82 ret = -EPERM;
83 else
84 mmiotrace_pf_handler = NULL;
85 spin_unlock_irqrestore(&mmiotrace_handler_lock, flags);
86 return ret;
87}
88EXPORT_SYMBOL_GPL(mmiotrace_unregister_pf);
89#endif /* CONFIG_MMIOTRACE_HOOKS */
90
91/* returns non-zero if do_page_fault() should return */
92static inline int call_mmiotrace(struct pt_regs *regs,
93 unsigned long error_code,
94 unsigned long address)
95{ 54{
96#ifdef CONFIG_MMIOTRACE_HOOKS 55#ifdef CONFIG_MMIOTRACE_HOOKS
97 int ret = 0; 56 if (unlikely(is_kmmio_active()))
98 rcu_read_lock(); 57 if (kmmio_handler(regs, addr) == 1)
99 if (mmiotrace_pf_handler) 58 return -1;
100 ret = mmiotrace_pf_handler(regs, error_code, address);
101 rcu_read_unlock();
102 return ret;
103#else
104 return 0;
105#endif 59#endif
60 return 0;
106} 61}
107 62
108static inline int notify_page_fault(struct pt_regs *regs) 63static inline int notify_page_fault(struct pt_regs *regs)
@@ -657,7 +612,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
657 612
658 if (notify_page_fault(regs)) 613 if (notify_page_fault(regs))
659 return; 614 return;
660 if (call_mmiotrace(regs, error_code, address)) 615 if (unlikely(kmmio_fault(regs, address)))
661 return; 616 return;
662 617
663 /* 618 /*
diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h
index 7063281040da..96651bb59ba1 100644
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h
@@ -35,11 +35,4 @@ extern void show_regs(struct pt_regs *regs);
35extern unsigned long oops_begin(void); 35extern unsigned long oops_begin(void);
36extern void oops_end(unsigned long, struct pt_regs *, int signr); 36extern void oops_end(unsigned long, struct pt_regs *, int signr);
37 37
38typedef int (*pf_handler_func)(struct pt_regs *regs,
39 unsigned long error_code,
40 unsigned long address);
41
42extern int mmiotrace_register_pf(pf_handler_func new_pfh);
43extern int mmiotrace_unregister_pf(pf_handler_func old_pfh);
44
45#endif 38#endif
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
index 6ec288f1fe24..d87a6cd8b686 100644
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -3,6 +3,44 @@
3 3
4#include <asm/types.h> 4#include <asm/types.h>
5 5
6#ifdef __KERNEL__
7
8#include <linux/list.h>
9
10struct kmmio_probe;
11struct pt_regs;
12
13typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *,
14 struct pt_regs *, unsigned long addr);
15typedef void (*kmmio_post_handler_t)(struct kmmio_probe *,
16 unsigned long condition, struct pt_regs *);
17
18struct kmmio_probe {
19 struct list_head list;
20 unsigned long addr; /* start location of the probe point */
21 unsigned long len; /* length of the probe region */
22 kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */
23 kmmio_post_handler_t post_handler; /* Called after addr is executed */
24};
25
26/* kmmio is active by some kmmio_probes? */
27static inline int is_kmmio_active(void)
28{
29 extern unsigned int kmmio_count;
30 return kmmio_count;
31}
32
33extern void reference_kmmio(void);
34extern void unreference_kmmio(void);
35extern int register_kmmio_probe(struct kmmio_probe *p);
36extern void unregister_kmmio_probe(struct kmmio_probe *p);
37
38/* Called from page fault handler. */
39extern int kmmio_handler(struct pt_regs *regs, unsigned long addr);
40
41#endif /* __KERNEL__ */
42
43
6/* 44/*
7 * If you change anything here, you must bump MMIO_VERSION. 45 * If you change anything here, you must bump MMIO_VERSION.
8 * This is the relay data format for user space. 46 * This is the relay data format for user space.