aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/mmiotrace/kmmio.c
diff options
context:
space:
mode:
authorPekka Paalanen <pq@iki.fi>2008-05-12 15:20:57 -0400
committerThomas Gleixner <tglx@linutronix.de>2008-05-24 05:22:12 -0400
commit0fd0e3da4557c479b820b9a4a7afa25b4637ddf2 (patch)
tree5f34b3673202303f394c6dd180a15751f50014e9 /arch/x86/kernel/mmiotrace/kmmio.c
parentf513638030ca384b0bace4df64f0b82f6ae1e4c6 (diff)
x86: mmiotrace full patch, preview 1
kmmio.c handles the list of mmio probes with callbacks, list of traced pages, and attaching into the page fault handler and die notifier. It arms, traps and disarms the given pages, this is the core of mmiotrace. mmio-mod.c is a user interface, hooking into ioremap functions and registering the mmio probes. It also decodes the required information from trapped mmio accesses via the pre and post callbacks in each probe. Currently, hooking into ioremap functions works by redefining the symbols of the target (binary) kernel module, so that it calls the traced versions of the functions. The most notable changes done since the last discussion are: - kmmio.c is a built-in, not part of the module - direct call from fault.c to kmmio.c, removing all dynamic hooks - prepare for unregistering probes at any time - make kmmio re-initializable and accessible to more than one user - rewrite kmmio locking to remove all spinlocks from page fault path Can I abuse call_rcu() like I do in kmmio.c:unregister_kmmio_probe() or is there a better way? The function called via call_rcu() itself calls call_rcu() again, will this work or break? There I need a second grace period for RCU after the first grace period for page faults. Mmiotrace itself (mmio-mod.c) is still a module, I am going to attack that next. At some point I will start looking into how to make mmiotrace a tracer component of ftrace (thanks for the hint, Ingo). Ftrace should make the user space part of mmiotracing as simple as 'cat /debug/trace/mmio > dump.txt'. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel/mmiotrace/kmmio.c')
-rw-r--r--arch/x86/kernel/mmiotrace/kmmio.c349
1 files changed, 240 insertions, 109 deletions
diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c
index 5e239d0b8467..539a9b19588f 100644
--- a/arch/x86/kernel/mmiotrace/kmmio.c
+++ b/arch/x86/kernel/mmiotrace/kmmio.c
@@ -6,6 +6,7 @@
6 */ 6 */
7 7
8#include <linux/version.h> 8#include <linux/version.h>
9#include <linux/list.h>
9#include <linux/spinlock.h> 10#include <linux/spinlock.h>
10#include <linux/hash.h> 11#include <linux/hash.h>
11#include <linux/init.h> 12#include <linux/init.h>
@@ -17,70 +18,119 @@
17#include <linux/ptrace.h> 18#include <linux/ptrace.h>
18#include <linux/preempt.h> 19#include <linux/preempt.h>
19#include <linux/percpu.h> 20#include <linux/percpu.h>
21#include <linux/kdebug.h>
20#include <asm/io.h> 22#include <asm/io.h>
21#include <asm/cacheflush.h> 23#include <asm/cacheflush.h>
22#include <asm/errno.h> 24#include <asm/errno.h>
23#include <asm/tlbflush.h> 25#include <asm/tlbflush.h>
24#include <asm/pgtable.h> 26#include <asm/pgtable.h>
25 27
26#include "kmmio.h" 28#include <linux/mmiotrace.h>
27 29
28#define KMMIO_HASH_BITS 6
29#define KMMIO_TABLE_SIZE (1 << KMMIO_HASH_BITS)
30#define KMMIO_PAGE_HASH_BITS 4 30#define KMMIO_PAGE_HASH_BITS 4
31#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS) 31#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
32 32
33struct kmmio_fault_page {
34 struct list_head list;
35 struct kmmio_fault_page *release_next;
36 unsigned long page; /* location of the fault page */
37
38 /*
39 * Number of times this page has been registered as a part
40 * of a probe. If zero, page is disarmed and this may be freed.
41 * Used only by writers (RCU).
42 */
43 int count;
44};
45
46struct kmmio_delayed_release {
47 struct rcu_head rcu;
48 struct kmmio_fault_page *release_list;
49};
50
33struct kmmio_context { 51struct kmmio_context {
34 struct kmmio_fault_page *fpage; 52 struct kmmio_fault_page *fpage;
35 struct kmmio_probe *probe; 53 struct kmmio_probe *probe;
36 unsigned long saved_flags; 54 unsigned long saved_flags;
55 unsigned long addr;
37 int active; 56 int active;
38}; 57};
39 58
40static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code,
41 unsigned long address);
42static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, 59static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
43 void *args); 60 void *args);
44 61
62static DECLARE_MUTEX(kmmio_init_mutex);
45static DEFINE_SPINLOCK(kmmio_lock); 63static DEFINE_SPINLOCK(kmmio_lock);
46 64
47/* These are protected by kmmio_lock */ 65/* These are protected by kmmio_lock */
66static int kmmio_initialized;
48unsigned int kmmio_count; 67unsigned int kmmio_count;
49static unsigned int handler_registered; 68
69/* Read-protected by RCU, write-protected by kmmio_lock. */
50static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; 70static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
51static LIST_HEAD(kmmio_probes); 71static LIST_HEAD(kmmio_probes);
52 72
73static struct list_head *kmmio_page_list(unsigned long page)
74{
75 return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
76}
77
53/* Accessed per-cpu */ 78/* Accessed per-cpu */
54static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); 79static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
55 80
81/* protected by kmmio_init_mutex */
56static struct notifier_block nb_die = { 82static struct notifier_block nb_die = {
57 .notifier_call = kmmio_die_notifier 83 .notifier_call = kmmio_die_notifier
58}; 84};
59 85
60int init_kmmio(void) 86/**
87 * Makes sure kmmio is initialized and usable.
88 * This must be called before any other kmmio function defined here.
89 * May sleep.
90 */
91void reference_kmmio(void)
61{ 92{
62 int i; 93 down(&kmmio_init_mutex);
63 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) 94 spin_lock_irq(&kmmio_lock);
64 INIT_LIST_HEAD(&kmmio_page_table[i]); 95 if (!kmmio_initialized) {
65 96 int i;
66 register_die_notifier(&nb_die); 97 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
67 return 0; 98 INIT_LIST_HEAD(&kmmio_page_table[i]);
99 if (register_die_notifier(&nb_die))
100 BUG();
101 }
102 kmmio_initialized++;
103 spin_unlock_irq(&kmmio_lock);
104 up(&kmmio_init_mutex);
68} 105}
106EXPORT_SYMBOL_GPL(reference_kmmio);
69 107
70void cleanup_kmmio(void) 108/**
109 * Clean up kmmio after use. This must be called for every call to
110 * reference_kmmio(). All probes registered after the corresponding
111 * reference_kmmio() must have been unregistered when calling this.
112 * May sleep.
113 */
114void unreference_kmmio(void)
71{ 115{
72 /* 116 bool unreg = false;
73 * Assume the following have been already cleaned by calling 117
74 * unregister_kmmio_probe() appropriately: 118 down(&kmmio_init_mutex);
75 * kmmio_page_table, kmmio_probes 119 spin_lock_irq(&kmmio_lock);
76 */ 120
77 if (handler_registered) { 121 if (kmmio_initialized == 1) {
78 if (mmiotrace_unregister_pf(&kmmio_page_fault)) 122 BUG_ON(is_kmmio_active());
79 BUG(); 123 unreg = true;
80 synchronize_rcu();
81 } 124 }
82 unregister_die_notifier(&nb_die); 125 kmmio_initialized--;
126 BUG_ON(kmmio_initialized < 0);
127 spin_unlock_irq(&kmmio_lock);
128
129 if (unreg)
130 unregister_die_notifier(&nb_die); /* calls sync_rcu() */
131 up(&kmmio_init_mutex);
83} 132}
133EXPORT_SYMBOL(unreference_kmmio);
84 134
85/* 135/*
86 * this is basically a dynamic stabbing problem: 136 * this is basically a dynamic stabbing problem:
@@ -90,33 +140,33 @@ void cleanup_kmmio(void)
90 * Overlap a Point (might be simple) 140 * Overlap a Point (might be simple)
91 * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup 141 * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
92 */ 142 */
93/* Get the kmmio at this addr (if any). You must be holding kmmio_lock. */ 143/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
94static struct kmmio_probe *get_kmmio_probe(unsigned long addr) 144static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
95{ 145{
96 struct kmmio_probe *p; 146 struct kmmio_probe *p;
97 list_for_each_entry(p, &kmmio_probes, list) { 147 list_for_each_entry_rcu(p, &kmmio_probes, list) {
98 if (addr >= p->addr && addr <= (p->addr + p->len)) 148 if (addr >= p->addr && addr <= (p->addr + p->len))
99 return p; 149 return p;
100 } 150 }
101 return NULL; 151 return NULL;
102} 152}
103 153
154/* You must be holding RCU read lock. */
104static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) 155static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
105{ 156{
106 struct list_head *head, *tmp; 157 struct list_head *head;
158 struct kmmio_fault_page *p;
107 159
108 page &= PAGE_MASK; 160 page &= PAGE_MASK;
109 head = &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; 161 head = kmmio_page_list(page);
110 list_for_each(tmp, head) { 162 list_for_each_entry_rcu(p, head, list) {
111 struct kmmio_fault_page *p
112 = list_entry(tmp, struct kmmio_fault_page, list);
113 if (p->page == page) 163 if (p->page == page)
114 return p; 164 return p;
115 } 165 }
116
117 return NULL; 166 return NULL;
118} 167}
119 168
169/** Mark the given page as not present. Access to it will trigger a fault. */
120static void arm_kmmio_fault_page(unsigned long page, int *page_level) 170static void arm_kmmio_fault_page(unsigned long page, int *page_level)
121{ 171{
122 unsigned long address = page & PAGE_MASK; 172 unsigned long address = page & PAGE_MASK;
@@ -124,8 +174,8 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level)
124 pte_t *pte = lookup_address(address, &level); 174 pte_t *pte = lookup_address(address, &level);
125 175
126 if (!pte) { 176 if (!pte) {
127 printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", 177 pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
128 __FUNCTION__, page); 178 __func__, page);
129 return; 179 return;
130 } 180 }
131 181
@@ -143,6 +193,7 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level)
143 __flush_tlb_one(page); 193 __flush_tlb_one(page);
144} 194}
145 195
196/** Mark the given page as present. */
146static void disarm_kmmio_fault_page(unsigned long page, int *page_level) 197static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
147{ 198{
148 unsigned long address = page & PAGE_MASK; 199 unsigned long address = page & PAGE_MASK;
@@ -150,8 +201,8 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
150 pte_t *pte = lookup_address(address, &level); 201 pte_t *pte = lookup_address(address, &level);
151 202
152 if (!pte) { 203 if (!pte) {
153 printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", 204 pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
154 __FUNCTION__, page); 205 __func__, page);
155 return; 206 return;
156 } 207 }
157 208
@@ -170,12 +221,24 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
170} 221}
171 222
172/* 223/*
224 * This is being called from do_page_fault().
225 *
226 * We may be in an interrupt or a critical section. Also prefecthing may
227 * trigger a page fault. We may be in the middle of process switch.
228 * We cannot take any locks, because we could be executing especially
229 * within a kmmio critical section.
230 *
231 * Local interrupts are disabled, so preemption cannot happen.
232 * Do not enable interrupts, do not sleep, and watch out for other CPUs.
233 */
234/*
173 * Interrupts are disabled on entry as trap3 is an interrupt gate 235 * Interrupts are disabled on entry as trap3 is an interrupt gate
174 * and they remain disabled thorough out this function. 236 * and they remain disabled thorough out this function.
175 */ 237 */
176static int kmmio_handler(struct pt_regs *regs, unsigned long addr) 238int kmmio_handler(struct pt_regs *regs, unsigned long addr)
177{ 239{
178 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); 240 struct kmmio_context *ctx;
241 struct kmmio_fault_page *faultpage;
179 242
180 /* 243 /*
181 * Preemption is now disabled to prevent process switch during 244 * Preemption is now disabled to prevent process switch during
@@ -186,40 +249,40 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr)
186 * XXX what if an interrupt occurs between returning from 249 * XXX what if an interrupt occurs between returning from
187 * do_page_fault() and entering the single-step exception handler? 250 * do_page_fault() and entering the single-step exception handler?
188 * And that interrupt triggers a kmmio trap? 251 * And that interrupt triggers a kmmio trap?
252 * XXX If we tracing an interrupt service routine or whatever, is
253 * this enough to keep it on the current cpu?
189 */ 254 */
190 preempt_disable(); 255 preempt_disable();
191 256
192 /* interrupts disabled and CPU-local data => atomicity guaranteed. */ 257 rcu_read_lock();
258 faultpage = get_kmmio_fault_page(addr);
259 if (!faultpage) {
260 /*
261 * Either this page fault is not caused by kmmio, or
262 * another CPU just pulled the kmmio probe from under
263 * our feet. In the latter case all hell breaks loose.
264 */
265 goto no_kmmio;
266 }
267
268 ctx = &get_cpu_var(kmmio_ctx);
193 if (ctx->active) { 269 if (ctx->active) {
194 /* 270 /*
195 * This avoids a deadlock with kmmio_lock. 271 * Prevent overwriting already in-flight context.
196 * If this page fault really was due to kmmio trap, 272 * If this page fault really was due to kmmio trap,
197 * all hell breaks loose. 273 * all hell breaks loose.
198 */ 274 */
199 printk(KERN_EMERG "mmiotrace: recursive probe hit on CPU %d, " 275 pr_emerg("kmmio: recursive probe hit on CPU %d, "
200 "for address %lu. Ignoring.\n", 276 "for address 0x%08lx. Ignoring.\n",
201 smp_processor_id(), addr); 277 smp_processor_id(), addr);
202 goto no_kmmio; 278 goto no_kmmio_ctx;
203 } 279 }
204 ctx->active++; 280 ctx->active++;
205 281
206 /* 282 ctx->fpage = faultpage;
207 * Acquire the kmmio lock to prevent changes affecting
208 * get_kmmio_fault_page() and get_kmmio_probe(), since we save their
209 * returned pointers.
210 * The lock is released in post_kmmio_handler().
211 * XXX: could/should get_kmmio_*() be using RCU instead of spinlock?
212 */
213 spin_lock(&kmmio_lock);
214
215 ctx->fpage = get_kmmio_fault_page(addr);
216 if (!ctx->fpage) {
217 /* this page fault is not caused by kmmio */
218 goto no_kmmio_locked;
219 }
220
221 ctx->probe = get_kmmio_probe(addr); 283 ctx->probe = get_kmmio_probe(addr);
222 ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK)); 284 ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK));
285 ctx->addr = addr;
223 286
224 if (ctx->probe && ctx->probe->pre_handler) 287 if (ctx->probe && ctx->probe->pre_handler)
225 ctx->probe->pre_handler(ctx->probe, regs, addr); 288 ctx->probe->pre_handler(ctx->probe, regs, addr);
@@ -227,46 +290,62 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr)
227 regs->flags |= TF_MASK; 290 regs->flags |= TF_MASK;
228 regs->flags &= ~IF_MASK; 291 regs->flags &= ~IF_MASK;
229 292
230 /* We hold lock, now we set present bit in PTE and single step. */ 293 /* Now we set present bit in PTE and single step. */
231 disarm_kmmio_fault_page(ctx->fpage->page, NULL); 294 disarm_kmmio_fault_page(ctx->fpage->page, NULL);
232 295
233 put_cpu_var(kmmio_ctx); 296 put_cpu_var(kmmio_ctx);
297 rcu_read_unlock();
234 return 1; 298 return 1;
235 299
236no_kmmio_locked: 300no_kmmio_ctx:
237 spin_unlock(&kmmio_lock); 301 put_cpu_var(kmmio_ctx);
238 ctx->active--;
239no_kmmio: 302no_kmmio:
303 rcu_read_unlock();
240 preempt_enable_no_resched(); 304 preempt_enable_no_resched();
241 put_cpu_var(kmmio_ctx); 305 return 0; /* page fault not handled by kmmio */
242 /* page fault not handled by kmmio */
243 return 0;
244} 306}
245 307
246/* 308/*
247 * Interrupts are disabled on entry as trap1 is an interrupt gate 309 * Interrupts are disabled on entry as trap1 is an interrupt gate
248 * and they remain disabled thorough out this function. 310 * and they remain disabled thorough out this function.
249 * And we hold kmmio lock. 311 * This must always get called as the pair to kmmio_handler().
250 */ 312 */
251static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) 313static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
252{ 314{
253 int ret = 0; 315 int ret = 0;
316 struct kmmio_probe *probe;
317 struct kmmio_fault_page *faultpage;
254 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); 318 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
255 319
256 if (!ctx->active) 320 if (!ctx->active)
257 goto out; 321 goto out;
258 322
323 rcu_read_lock();
324
325 faultpage = get_kmmio_fault_page(ctx->addr);
326 probe = get_kmmio_probe(ctx->addr);
327 if (faultpage != ctx->fpage || probe != ctx->probe) {
328 /*
329 * The trace setup changed after kmmio_handler() and before
330 * running this respective post handler. User does not want
331 * the result anymore.
332 */
333 ctx->probe = NULL;
334 ctx->fpage = NULL;
335 }
336
259 if (ctx->probe && ctx->probe->post_handler) 337 if (ctx->probe && ctx->probe->post_handler)
260 ctx->probe->post_handler(ctx->probe, condition, regs); 338 ctx->probe->post_handler(ctx->probe, condition, regs);
261 339
262 arm_kmmio_fault_page(ctx->fpage->page, NULL); 340 if (ctx->fpage)
341 arm_kmmio_fault_page(ctx->fpage->page, NULL);
263 342
264 regs->flags &= ~TF_MASK; 343 regs->flags &= ~TF_MASK;
265 regs->flags |= ctx->saved_flags; 344 regs->flags |= ctx->saved_flags;
266 345
267 /* These were acquired in kmmio_handler(). */ 346 /* These were acquired in kmmio_handler(). */
268 ctx->active--; 347 ctx->active--;
269 spin_unlock(&kmmio_lock); 348 BUG_ON(ctx->active);
270 preempt_enable_no_resched(); 349 preempt_enable_no_resched();
271 350
272 /* 351 /*
@@ -277,11 +356,13 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
277 if (!(regs->flags & TF_MASK)) 356 if (!(regs->flags & TF_MASK))
278 ret = 1; 357 ret = 1;
279 358
359 rcu_read_unlock();
280out: 360out:
281 put_cpu_var(kmmio_ctx); 361 put_cpu_var(kmmio_ctx);
282 return ret; 362 return ret;
283} 363}
284 364
365/* You must be holding kmmio_lock. */
285static int add_kmmio_fault_page(unsigned long page) 366static int add_kmmio_fault_page(unsigned long page)
286{ 367{
287 struct kmmio_fault_page *f; 368 struct kmmio_fault_page *f;
@@ -289,6 +370,8 @@ static int add_kmmio_fault_page(unsigned long page)
289 page &= PAGE_MASK; 370 page &= PAGE_MASK;
290 f = get_kmmio_fault_page(page); 371 f = get_kmmio_fault_page(page);
291 if (f) { 372 if (f) {
373 if (!f->count)
374 arm_kmmio_fault_page(f->page, NULL);
292 f->count++; 375 f->count++;
293 return 0; 376 return 0;
294 } 377 }
@@ -299,15 +382,16 @@ static int add_kmmio_fault_page(unsigned long page)
299 382
300 f->count = 1; 383 f->count = 1;
301 f->page = page; 384 f->page = page;
302 list_add(&f->list, 385 list_add_rcu(&f->list, kmmio_page_list(f->page));
303 &kmmio_page_table[hash_long(f->page, KMMIO_PAGE_HASH_BITS)]);
304 386
305 arm_kmmio_fault_page(f->page, NULL); 387 arm_kmmio_fault_page(f->page, NULL);
306 388
307 return 0; 389 return 0;
308} 390}
309 391
310static void release_kmmio_fault_page(unsigned long page) 392/* You must be holding kmmio_lock. */
393static void release_kmmio_fault_page(unsigned long page,
394 struct kmmio_fault_page **release_list)
311{ 395{
312 struct kmmio_fault_page *f; 396 struct kmmio_fault_page *f;
313 397
@@ -317,9 +401,11 @@ static void release_kmmio_fault_page(unsigned long page)
317 return; 401 return;
318 402
319 f->count--; 403 f->count--;
404 BUG_ON(f->count < 0);
320 if (!f->count) { 405 if (!f->count) {
321 disarm_kmmio_fault_page(f->page, NULL); 406 disarm_kmmio_fault_page(f->page, NULL);
322 list_del(&f->list); 407 f->release_next = *release_list;
408 *release_list = f;
323 } 409 }
324} 410}
325 411
@@ -334,68 +420,113 @@ int register_kmmio_probe(struct kmmio_probe *p)
334 ret = -EEXIST; 420 ret = -EEXIST;
335 goto out; 421 goto out;
336 } 422 }
337 list_add(&p->list, &kmmio_probes); 423 list_add_rcu(&p->list, &kmmio_probes);
338 /*printk("adding fault pages...\n");*/
339 while (size < p->len) { 424 while (size < p->len) {
340 if (add_kmmio_fault_page(p->addr + size)) 425 if (add_kmmio_fault_page(p->addr + size))
341 printk(KERN_ERR "mmio: Unable to set page fault.\n"); 426 pr_err("kmmio: Unable to set page fault.\n");
342 size += PAGE_SIZE; 427 size += PAGE_SIZE;
343 } 428 }
344
345 if (!handler_registered) {
346 if (mmiotrace_register_pf(&kmmio_page_fault))
347 printk(KERN_ERR "mmiotrace: Cannot register page "
348 "fault handler.\n");
349 else
350 handler_registered++;
351 }
352
353out: 429out:
354 spin_unlock_irq(&kmmio_lock); 430 spin_unlock_irq(&kmmio_lock);
355 /* 431 /*
356 * XXX: What should I do here? 432 * XXX: What should I do here?
357 * Here was a call to global_flush_tlb(), but it does not exist 433 * Here was a call to global_flush_tlb(), but it does not exist
358 * anymore. 434 * anymore. It seems it's not needed after all.
359 */ 435 */
360 return ret; 436 return ret;
361} 437}
438EXPORT_SYMBOL(register_kmmio_probe);
362 439
440static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
441{
442 struct kmmio_delayed_release *dr = container_of(
443 head,
444 struct kmmio_delayed_release,
445 rcu);
446 struct kmmio_fault_page *p = dr->release_list;
447 while (p) {
448 struct kmmio_fault_page *next = p->release_next;
449 BUG_ON(p->count);
450 kfree(p);
451 p = next;
452 }
453 kfree(dr);
454}
455
456static void remove_kmmio_fault_pages(struct rcu_head *head)
457{
458 struct kmmio_delayed_release *dr = container_of(
459 head,
460 struct kmmio_delayed_release,
461 rcu);
462 struct kmmio_fault_page *p = dr->release_list;
463 struct kmmio_fault_page **prevp = &dr->release_list;
464 unsigned long flags;
465 spin_lock_irqsave(&kmmio_lock, flags);
466 while (p) {
467 if (!p->count)
468 list_del_rcu(&p->list);
469 else
470 *prevp = p->release_next;
471 prevp = &p->release_next;
472 p = p->release_next;
473 }
474 spin_unlock_irqrestore(&kmmio_lock, flags);
475 /* This is the real RCU destroy call. */
476 call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
477}
478
479/*
480 * Remove a kmmio probe. You have to synchronize_rcu() before you can be
481 * sure that the callbacks will not be called anymore.
482 *
483 * Unregistering a kmmio fault page has three steps:
484 * 1. release_kmmio_fault_page()
485 * Disarm the page, wait a grace period to let all faults finish.
486 * 2. remove_kmmio_fault_pages()
487 * Remove the pages from kmmio_page_table.
488 * 3. rcu_free_kmmio_fault_pages()
489 * Actally free the kmmio_fault_page structs as with RCU.
490 */
363void unregister_kmmio_probe(struct kmmio_probe *p) 491void unregister_kmmio_probe(struct kmmio_probe *p)
364{ 492{
365 unsigned long size = 0; 493 unsigned long size = 0;
494 struct kmmio_fault_page *release_list = NULL;
495 struct kmmio_delayed_release *drelease;
366 496
367 spin_lock_irq(&kmmio_lock); 497 spin_lock_irq(&kmmio_lock);
368 while (size < p->len) { 498 while (size < p->len) {
369 release_kmmio_fault_page(p->addr + size); 499 release_kmmio_fault_page(p->addr + size, &release_list);
370 size += PAGE_SIZE; 500 size += PAGE_SIZE;
371 } 501 }
372 list_del(&p->list); 502 list_del_rcu(&p->list);
373 kmmio_count--; 503 kmmio_count--;
374 spin_unlock_irq(&kmmio_lock); 504 spin_unlock_irq(&kmmio_lock);
375}
376 505
377/* 506 drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
378 * According to 2.6.20, mainly x86_64 arch: 507 if (!drelease) {
379 * This is being called from do_page_fault(), via the page fault notifier 508 pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
380 * chain. The chain is called for both user space faults and kernel space 509 return;
381 * faults (address >= TASK_SIZE64), except not on faults serviced by 510 }
382 * vmalloc_fault(). 511 drelease->release_list = release_list;
383 * 512
384 * We may be in an interrupt or a critical section. Also prefecthing may 513 /*
385 * trigger a page fault. We may be in the middle of process switch. 514 * This is not really RCU here. We have just disarmed a set of
386 * The page fault hook functionality has put us inside RCU read lock. 515 * pages so that they cannot trigger page faults anymore. However,
387 * 516 * we cannot remove the pages from kmmio_page_table,
388 * Local interrupts are disabled, so preemption cannot happen. 517 * because a probe hit might be in flight on another CPU. The
389 * Do not enable interrupts, do not sleep, and watch out for other CPUs. 518 * pages are collected into a list, and they will be removed from
390 */ 519 * kmmio_page_table when it is certain that no probe hit related to
391static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code, 520 * these pages can be in flight. RCU grace period sounds like a
392 unsigned long address) 521 * good choice.
393{ 522 *
394 if (is_kmmio_active()) 523 * If we removed the pages too early, kmmio page fault handler might
395 if (kmmio_handler(regs, address) == 1) 524 * not find the respective kmmio_fault_page and determine it's not
396 return -1; 525 * a kmmio fault, when it actually is. This would lead to madness.
397 return 0; 526 */
527 call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
398} 528}
529EXPORT_SYMBOL(unregister_kmmio_probe);
399 530
400static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, 531static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
401 void *args) 532 void *args)