aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/mmiotrace/kmmio.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/mmiotrace/kmmio.c')
-rw-r--r--arch/x86/kernel/mmiotrace/kmmio.c349
1 files changed, 240 insertions, 109 deletions
diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c
index 5e239d0b846..539a9b19588 100644
--- a/arch/x86/kernel/mmiotrace/kmmio.c
+++ b/arch/x86/kernel/mmiotrace/kmmio.c
@@ -6,6 +6,7 @@
6 */ 6 */
7 7
8#include <linux/version.h> 8#include <linux/version.h>
9#include <linux/list.h>
9#include <linux/spinlock.h> 10#include <linux/spinlock.h>
10#include <linux/hash.h> 11#include <linux/hash.h>
11#include <linux/init.h> 12#include <linux/init.h>
@@ -17,70 +18,119 @@
17#include <linux/ptrace.h> 18#include <linux/ptrace.h>
18#include <linux/preempt.h> 19#include <linux/preempt.h>
19#include <linux/percpu.h> 20#include <linux/percpu.h>
21#include <linux/kdebug.h>
20#include <asm/io.h> 22#include <asm/io.h>
21#include <asm/cacheflush.h> 23#include <asm/cacheflush.h>
22#include <asm/errno.h> 24#include <asm/errno.h>
23#include <asm/tlbflush.h> 25#include <asm/tlbflush.h>
24#include <asm/pgtable.h> 26#include <asm/pgtable.h>
25 27
26#include "kmmio.h" 28#include <linux/mmiotrace.h>
27 29
28#define KMMIO_HASH_BITS 6
29#define KMMIO_TABLE_SIZE (1 << KMMIO_HASH_BITS)
30#define KMMIO_PAGE_HASH_BITS 4 30#define KMMIO_PAGE_HASH_BITS 4
31#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS) 31#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
32 32
33struct kmmio_fault_page {
34 struct list_head list;
35 struct kmmio_fault_page *release_next;
36 unsigned long page; /* location of the fault page */
37
38 /*
39 * Number of times this page has been registered as a part
40 * of a probe. If zero, page is disarmed and this may be freed.
41 * Used only by writers (RCU).
42 */
43 int count;
44};
45
46struct kmmio_delayed_release {
47 struct rcu_head rcu;
48 struct kmmio_fault_page *release_list;
49};
50
33struct kmmio_context { 51struct kmmio_context {
34 struct kmmio_fault_page *fpage; 52 struct kmmio_fault_page *fpage;
35 struct kmmio_probe *probe; 53 struct kmmio_probe *probe;
36 unsigned long saved_flags; 54 unsigned long saved_flags;
55 unsigned long addr;
37 int active; 56 int active;
38}; 57};
39 58
40static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code,
41 unsigned long address);
42static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, 59static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
43 void *args); 60 void *args);
44 61
62static DECLARE_MUTEX(kmmio_init_mutex);
45static DEFINE_SPINLOCK(kmmio_lock); 63static DEFINE_SPINLOCK(kmmio_lock);
46 64
47/* These are protected by kmmio_lock */ 65/* These are protected by kmmio_lock */
66static int kmmio_initialized;
48unsigned int kmmio_count; 67unsigned int kmmio_count;
49static unsigned int handler_registered; 68
69/* Read-protected by RCU, write-protected by kmmio_lock. */
50static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; 70static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
51static LIST_HEAD(kmmio_probes); 71static LIST_HEAD(kmmio_probes);
52 72
73static struct list_head *kmmio_page_list(unsigned long page)
74{
75 return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
76}
77
53/* Accessed per-cpu */ 78/* Accessed per-cpu */
54static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); 79static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
55 80
81/* protected by kmmio_init_mutex */
56static struct notifier_block nb_die = { 82static struct notifier_block nb_die = {
57 .notifier_call = kmmio_die_notifier 83 .notifier_call = kmmio_die_notifier
58}; 84};
59 85
60int init_kmmio(void) 86/**
87 * Makes sure kmmio is initialized and usable.
88 * This must be called before any other kmmio function defined here.
89 * May sleep.
90 */
91void reference_kmmio(void)
61{ 92{
62 int i; 93 down(&kmmio_init_mutex);
63 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) 94 spin_lock_irq(&kmmio_lock);
64 INIT_LIST_HEAD(&kmmio_page_table[i]); 95 if (!kmmio_initialized) {
65 96 int i;
66 register_die_notifier(&nb_die); 97 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
67 return 0; 98 INIT_LIST_HEAD(&kmmio_page_table[i]);
99 if (register_die_notifier(&nb_die))
100 BUG();
101 }
102 kmmio_initialized++;
103 spin_unlock_irq(&kmmio_lock);
104 up(&kmmio_init_mutex);
68} 105}
106EXPORT_SYMBOL_GPL(reference_kmmio);
69 107
70void cleanup_kmmio(void) 108/**
109 * Clean up kmmio after use. This must be called for every call to
110 * reference_kmmio(). All probes registered after the corresponding
111 * reference_kmmio() must have been unregistered when calling this.
112 * May sleep.
113 */
114void unreference_kmmio(void)
71{ 115{
72 /* 116 bool unreg = false;
73 * Assume the following have been already cleaned by calling 117
74 * unregister_kmmio_probe() appropriately: 118 down(&kmmio_init_mutex);
75 * kmmio_page_table, kmmio_probes 119 spin_lock_irq(&kmmio_lock);
76 */ 120
77 if (handler_registered) { 121 if (kmmio_initialized == 1) {
78 if (mmiotrace_unregister_pf(&kmmio_page_fault)) 122 BUG_ON(is_kmmio_active());
79 BUG(); 123 unreg = true;
80 synchronize_rcu();
81 } 124 }
82 unregister_die_notifier(&nb_die); 125 kmmio_initialized--;
126 BUG_ON(kmmio_initialized < 0);
127 spin_unlock_irq(&kmmio_lock);
128
129 if (unreg)
130 unregister_die_notifier(&nb_die); /* calls sync_rcu() */
131 up(&kmmio_init_mutex);
83} 132}
133EXPORT_SYMBOL(unreference_kmmio);
84 134
85/* 135/*
86 * this is basically a dynamic stabbing problem: 136 * this is basically a dynamic stabbing problem:
@@ -90,33 +140,33 @@ void cleanup_kmmio(void)
90 * Overlap a Point (might be simple) 140 * Overlap a Point (might be simple)
91 * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup 141 * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
92 */ 142 */
93/* Get the kmmio at this addr (if any). You must be holding kmmio_lock. */ 143/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
94static struct kmmio_probe *get_kmmio_probe(unsigned long addr) 144static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
95{ 145{
96 struct kmmio_probe *p; 146 struct kmmio_probe *p;
97 list_for_each_entry(p, &kmmio_probes, list) { 147 list_for_each_entry_rcu(p, &kmmio_probes, list) {
98 if (addr >= p->addr && addr <= (p->addr + p->len)) 148 if (addr >= p->addr && addr <= (p->addr + p->len))
99 return p; 149 return p;
100 } 150 }
101 return NULL; 151 return NULL;
102} 152}
103 153
154/* You must be holding RCU read lock. */
104static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) 155static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
105{ 156{
106 struct list_head *head, *tmp; 157 struct list_head *head;
158 struct kmmio_fault_page *p;
107 159
108 page &= PAGE_MASK; 160 page &= PAGE_MASK;
109 head = &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; 161 head = kmmio_page_list(page);
110 list_for_each(tmp, head) { 162 list_for_each_entry_rcu(p, head, list) {
111 struct kmmio_fault_page *p
112 = list_entry(tmp, struct kmmio_fault_page, list);
113 if (p->page == page) 163 if (p->page == page)
114 return p; 164 return p;
115 } 165 }
116
117 return NULL; 166 return NULL;
118} 167}
119 168
169/** Mark the given page as not present. Access to it will trigger a fault. */
120static void arm_kmmio_fault_page(unsigned long page, int *page_level) 170static void arm_kmmio_fault_page(unsigned long page, int *page_level)
121{ 171{
122 unsigned long address = page & PAGE_MASK; 172 unsigned long address = page & PAGE_MASK;
@@ -124,8 +174,8 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level)
124 pte_t *pte = lookup_address(address, &level); 174 pte_t *pte = lookup_address(address, &level);
125 175
126 if (!pte) { 176 if (!pte) {
127 printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", 177 pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
128 __FUNCTION__, page); 178 __func__, page);
129 return; 179 return;
130 } 180 }
131 181
@@ -143,6 +193,7 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level)
143 __flush_tlb_one(page); 193 __flush_tlb_one(page);
144} 194}
145 195
196/** Mark the given page as present. */
146static void disarm_kmmio_fault_page(unsigned long page, int *page_level) 197static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
147{ 198{
148 unsigned long address = page & PAGE_MASK; 199 unsigned long address = page & PAGE_MASK;
@@ -150,8 +201,8 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
150 pte_t *pte = lookup_address(address, &level); 201 pte_t *pte = lookup_address(address, &level);
151 202
152 if (!pte) { 203 if (!pte) {
153 printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", 204 pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
154 __FUNCTION__, page); 205 __func__, page);
155 return; 206 return;
156 } 207 }
157 208
@@ -170,12 +221,24 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
170} 221}
171 222
172/* 223/*
224 * This is being called from do_page_fault().
225 *
226 * We may be in an interrupt or a critical section. Also prefecthing may
227 * trigger a page fault. We may be in the middle of process switch.
228 * We cannot take any locks, because we could be executing especially
229 * within a kmmio critical section.
230 *
231 * Local interrupts are disabled, so preemption cannot happen.
232 * Do not enable interrupts, do not sleep, and watch out for other CPUs.
233 */
234/*
173 * Interrupts are disabled on entry as trap3 is an interrupt gate 235 * Interrupts are disabled on entry as trap3 is an interrupt gate
174 * and they remain disabled thorough out this function. 236 * and they remain disabled thorough out this function.
175 */ 237 */
176static int kmmio_handler(struct pt_regs *regs, unsigned long addr) 238int kmmio_handler(struct pt_regs *regs, unsigned long addr)
177{ 239{
178 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); 240 struct kmmio_context *ctx;
241 struct kmmio_fault_page *faultpage;
179 242
180 /* 243 /*
181 * Preemption is now disabled to prevent process switch during 244 * Preemption is now disabled to prevent process switch during
@@ -186,40 +249,40 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr)
186 * XXX what if an interrupt occurs between returning from 249 * XXX what if an interrupt occurs between returning from
187 * do_page_fault() and entering the single-step exception handler? 250 * do_page_fault() and entering the single-step exception handler?
188 * And that interrupt triggers a kmmio trap? 251 * And that interrupt triggers a kmmio trap?
252 * XXX If we tracing an interrupt service routine or whatever, is
253 * this enough to keep it on the current cpu?
189 */ 254 */
190 preempt_disable(); 255 preempt_disable();
191 256
192 /* interrupts disabled and CPU-local data => atomicity guaranteed. */ 257 rcu_read_lock();
258 faultpage = get_kmmio_fault_page(addr);
259 if (!faultpage) {
260 /*
261 * Either this page fault is not caused by kmmio, or
262 * another CPU just pulled the kmmio probe from under
263 * our feet. In the latter case all hell breaks loose.
264 */
265 goto no_kmmio;
266 }
267
268 ctx = &get_cpu_var(kmmio_ctx);
193 if (ctx->active) { 269 if (ctx->active) {
194 /* 270 /*
195 * This avoids a deadlock with kmmio_lock. 271 * Prevent overwriting already in-flight context.
196 * If this page fault really was due to kmmio trap, 272 * If this page fault really was due to kmmio trap,
197 * all hell breaks loose. 273 * all hell breaks loose.
198 */ 274 */
199 printk(KERN_EMERG "mmiotrace: recursive probe hit on CPU %d, " 275 pr_emerg("kmmio: recursive probe hit on CPU %d, "
200 "for address %lu. Ignoring.\n", 276 "for address 0x%08lx. Ignoring.\n",
201 smp_processor_id(), addr); 277 smp_processor_id(), addr);
202 goto no_kmmio; 278 goto no_kmmio_ctx;
203 } 279 }
204 ctx->active++; 280 ctx->active++;
205 281
206 /* 282 ctx->fpage = faultpage;
207 * Acquire the kmmio lock to prevent changes affecting
208 * get_kmmio_fault_page() and get_kmmio_probe(), since we save their
209 * returned pointers.
210 * The lock is released in post_kmmio_handler().
211 * XXX: could/should get_kmmio_*() be using RCU instead of spinlock?
212 */
213 spin_lock(&kmmio_lock);
214
215 ctx->fpage = get_kmmio_fault_page(addr);
216 if (!ctx->fpage) {
217 /* this page fault is not caused by kmmio */
218 goto no_kmmio_locked;
219 }
220
221 ctx->probe = get_kmmio_probe(addr); 283 ctx->probe = get_kmmio_probe(addr);
222 ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK)); 284 ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK));
285 ctx->addr = addr;
223 286
224 if (ctx->probe && ctx->probe->pre_handler) 287 if (ctx->probe && ctx->probe->pre_handler)
225 ctx->probe->pre_handler(ctx->probe, regs, addr); 288 ctx->probe->pre_handler(ctx->probe, regs, addr);
@@ -227,46 +290,62 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr)
227 regs->flags |= TF_MASK; 290 regs->flags |= TF_MASK;
228 regs->flags &= ~IF_MASK; 291 regs->flags &= ~IF_MASK;
229 292
230 /* We hold lock, now we set present bit in PTE and single step. */ 293 /* Now we set present bit in PTE and single step. */
231 disarm_kmmio_fault_page(ctx->fpage->page, NULL); 294 disarm_kmmio_fault_page(ctx->fpage->page, NULL);
232 295
233 put_cpu_var(kmmio_ctx); 296 put_cpu_var(kmmio_ctx);
297 rcu_read_unlock();
234 return 1; 298 return 1;
235 299
236no_kmmio_locked: 300no_kmmio_ctx:
237 spin_unlock(&kmmio_lock); 301 put_cpu_var(kmmio_ctx);
238 ctx->active--;
239no_kmmio: 302no_kmmio:
303 rcu_read_unlock();
240 preempt_enable_no_resched(); 304 preempt_enable_no_resched();
241 put_cpu_var(kmmio_ctx); 305 return 0; /* page fault not handled by kmmio */
242 /* page fault not handled by kmmio */
243 return 0;
244} 306}
245 307
246/* 308/*
247 * Interrupts are disabled on entry as trap1 is an interrupt gate 309 * Interrupts are disabled on entry as trap1 is an interrupt gate
248 * and they remain disabled thorough out this function. 310 * and they remain disabled thorough out this function.
249 * And we hold kmmio lock. 311 * This must always get called as the pair to kmmio_handler().
250 */ 312 */
251static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) 313static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
252{ 314{
253 int ret = 0; 315 int ret = 0;
316 struct kmmio_probe *probe;
317 struct kmmio_fault_page *faultpage;
254 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); 318 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
255 319
256 if (!ctx->active) 320 if (!ctx->active)
257 goto out; 321 goto out;
258 322
323 rcu_read_lock();
324
325 faultpage = get_kmmio_fault_page(ctx->addr);
326 probe = get_kmmio_probe(ctx->addr);
327 if (faultpage != ctx->fpage || probe != ctx->probe) {
328 /*
329 * The trace setup changed after kmmio_handler() and before
330 * running this respective post handler. User does not want
331 * the result anymore.
332 */
333 ctx->probe = NULL;
334 ctx->fpage = NULL;
335 }
336
259 if (ctx->probe && ctx->probe->post_handler) 337 if (ctx->probe && ctx->probe->post_handler)
260 ctx->probe->post_handler(ctx->probe, condition, regs); 338 ctx->probe->post_handler(ctx->probe, condition, regs);
261 339
262 arm_kmmio_fault_page(ctx->fpage->page, NULL); 340 if (ctx->fpage)
341 arm_kmmio_fault_page(ctx->fpage->page, NULL);
263 342
264 regs->flags &= ~TF_MASK; 343 regs->flags &= ~TF_MASK;
265 regs->flags |= ctx->saved_flags; 344 regs->flags |= ctx->saved_flags;
266 345
267 /* These were acquired in kmmio_handler(). */ 346 /* These were acquired in kmmio_handler(). */
268 ctx->active--; 347 ctx->active--;
269 spin_unlock(&kmmio_lock); 348 BUG_ON(ctx->active);
270 preempt_enable_no_resched(); 349 preempt_enable_no_resched();
271 350
272 /* 351 /*
@@ -277,11 +356,13 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
277 if (!(regs->flags & TF_MASK)) 356 if (!(regs->flags & TF_MASK))
278 ret = 1; 357 ret = 1;
279 358
359 rcu_read_unlock();
280out: 360out:
281 put_cpu_var(kmmio_ctx); 361 put_cpu_var(kmmio_ctx);
282 return ret; 362 return ret;
283} 363}
284 364
365/* You must be holding kmmio_lock. */
285static int add_kmmio_fault_page(unsigned long page) 366static int add_kmmio_fault_page(unsigned long page)
286{ 367{
287 struct kmmio_fault_page *f; 368 struct kmmio_fault_page *f;
@@ -289,6 +370,8 @@ static int add_kmmio_fault_page(unsigned long page)
289 page &= PAGE_MASK; 370 page &= PAGE_MASK;
290 f = get_kmmio_fault_page(page); 371 f = get_kmmio_fault_page(page);
291 if (f) { 372 if (f) {
373 if (!f->count)
374 arm_kmmio_fault_page(f->page, NULL);
292 f->count++; 375 f->count++;
293 return 0; 376 return 0;
294 } 377 }
@@ -299,15 +382,16 @@ static int add_kmmio_fault_page(unsigned long page)
299 382
300 f->count = 1; 383 f->count = 1;
301 f->page = page; 384 f->page = page;
302 list_add(&f->list, 385 list_add_rcu(&f->list, kmmio_page_list(f->page));
303 &kmmio_page_table[hash_long(f->page, KMMIO_PAGE_HASH_BITS)]);
304 386
305 arm_kmmio_fault_page(f->page, NULL); 387 arm_kmmio_fault_page(f->page, NULL);
306 388
307 return 0; 389 return 0;
308} 390}
309 391
310static void release_kmmio_fault_page(unsigned long page) 392/* You must be holding kmmio_lock. */
393static void release_kmmio_fault_page(unsigned long page,
394 struct kmmio_fault_page **release_list)
311{ 395{
312 struct kmmio_fault_page *f; 396 struct kmmio_fault_page *f;
313 397
@@ -317,9 +401,11 @@ static void release_kmmio_fault_page(unsigned long page)
317 return; 401 return;
318 402
319 f->count--; 403 f->count--;
404 BUG_ON(f->count < 0);
320 if (!f->count) { 405 if (!f->count) {
321 disarm_kmmio_fault_page(f->page, NULL); 406 disarm_kmmio_fault_page(f->page, NULL);
322 list_del(&f->list); 407 f->release_next = *release_list;
408 *release_list = f;
323 } 409 }
324} 410}
325 411
@@ -334,68 +420,113 @@ int register_kmmio_probe(struct kmmio_probe *p)
334 ret = -EEXIST; 420 ret = -EEXIST;
335 goto out; 421 goto out;
336 } 422 }
337 list_add(&p->list, &kmmio_probes); 423 list_add_rcu(&p->list, &kmmio_probes);
338 /*printk("adding fault pages...\n");*/
339 while (size < p->len) { 424 while (size < p->len) {
340 if (add_kmmio_fault_page(p->addr + size)) 425 if (add_kmmio_fault_page(p->addr + size))
341 printk(KERN_ERR "mmio: Unable to set page fault.\n"); 426 pr_err("kmmio: Unable to set page fault.\n");
342 size += PAGE_SIZE; 427 size += PAGE_SIZE;
343 } 428 }
344
345 if (!handler_registered) {
346 if (mmiotrace_register_pf(&kmmio_page_fault))
347 printk(KERN_ERR "mmiotrace: Cannot register page "
348 "fault handler.\n");
349 else
350 handler_registered++;
351 }
352
353out: 429out:
354 spin_unlock_irq(&kmmio_lock); 430 spin_unlock_irq(&kmmio_lock);
355 /* 431 /*
356 * XXX: What should I do here? 432 * XXX: What should I do here?
357 * Here was a call to global_flush_tlb(), but it does not exist 433 * Here was a call to global_flush_tlb(), but it does not exist
358 * anymore. 434 * anymore. It seems it's not needed after all.
359 */ 435 */
360 return ret; 436 return ret;
361} 437}
438EXPORT_SYMBOL(register_kmmio_probe);
362 439
440static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
441{
442 struct kmmio_delayed_release *dr = container_of(
443 head,
444 struct kmmio_delayed_release,
445 rcu);
446 struct kmmio_fault_page *p = dr->release_list;
447 while (p) {
448 struct kmmio_fault_page *next = p->release_next;
449 BUG_ON(p->count);
450 kfree(p);
451 p = next;
452 }
453 kfree(dr);
454}
455
456static void remove_kmmio_fault_pages(struct rcu_head *head)
457{
458 struct kmmio_delayed_release *dr = container_of(
459 head,
460 struct kmmio_delayed_release,
461 rcu);
462 struct kmmio_fault_page *p = dr->release_list;
463 struct kmmio_fault_page **prevp = &dr->release_list;
464 unsigned long flags;
465 spin_lock_irqsave(&kmmio_lock, flags);
466 while (p) {
467 if (!p->count)
468 list_del_rcu(&p->list);
469 else
470 *prevp = p->release_next;
471 prevp = &p->release_next;
472 p = p->release_next;
473 }
474 spin_unlock_irqrestore(&kmmio_lock, flags);
475 /* This is the real RCU destroy call. */
476 call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
477}
478
479/*
480 * Remove a kmmio probe. You have to synchronize_rcu() before you can be
481 * sure that the callbacks will not be called anymore.
482 *
483 * Unregistering a kmmio fault page has three steps:
484 * 1. release_kmmio_fault_page()
485 * Disarm the page, wait a grace period to let all faults finish.
486 * 2. remove_kmmio_fault_pages()
487 * Remove the pages from kmmio_page_table.
488 * 3. rcu_free_kmmio_fault_pages()
489 * Actally free the kmmio_fault_page structs as with RCU.
490 */
363void unregister_kmmio_probe(struct kmmio_probe *p) 491void unregister_kmmio_probe(struct kmmio_probe *p)
364{ 492{
365 unsigned long size = 0; 493 unsigned long size = 0;
494 struct kmmio_fault_page *release_list = NULL;
495 struct kmmio_delayed_release *drelease;
366 496
367 spin_lock_irq(&kmmio_lock); 497 spin_lock_irq(&kmmio_lock);
368 while (size < p->len) { 498 while (size < p->len) {
369 release_kmmio_fault_page(p->addr + size); 499 release_kmmio_fault_page(p->addr + size, &release_list);
370 size += PAGE_SIZE; 500 size += PAGE_SIZE;
371 } 501 }
372 list_del(&p->list); 502 list_del_rcu(&p->list);
373 kmmio_count--; 503 kmmio_count--;
374 spin_unlock_irq(&kmmio_lock); 504 spin_unlock_irq(&kmmio_lock);
375}
376 505
377/* 506 drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
378 * According to 2.6.20, mainly x86_64 arch: 507 if (!drelease) {
379 * This is being called from do_page_fault(), via the page fault notifier 508 pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
380 * chain. The chain is called for both user space faults and kernel space 509 return;
381 * faults (address >= TASK_SIZE64), except not on faults serviced by 510 }
382 * vmalloc_fault(). 511 drelease->release_list = release_list;
383 * 512
384 * We may be in an interrupt or a critical section. Also prefecthing may 513 /*
385 * trigger a page fault. We may be in the middle of process switch. 514 * This is not really RCU here. We have just disarmed a set of
386 * The page fault hook functionality has put us inside RCU read lock. 515 * pages so that they cannot trigger page faults anymore. However,
387 * 516 * we cannot remove the pages from kmmio_page_table,
388 * Local interrupts are disabled, so preemption cannot happen. 517 * because a probe hit might be in flight on another CPU. The
389 * Do not enable interrupts, do not sleep, and watch out for other CPUs. 518 * pages are collected into a list, and they will be removed from
390 */ 519 * kmmio_page_table when it is certain that no probe hit related to
391static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code, 520 * these pages can be in flight. RCU grace period sounds like a
392 unsigned long address) 521 * good choice.
393{ 522 *
394 if (is_kmmio_active()) 523 * If we removed the pages too early, kmmio page fault handler might
395 if (kmmio_handler(regs, address) == 1) 524 * not find the respective kmmio_fault_page and determine it's not
396 return -1; 525 * a kmmio fault, when it actually is. This would lead to madness.
397 return 0; 526 */
527 call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
398} 528}
529EXPORT_SYMBOL(unregister_kmmio_probe);
399 530
400static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, 531static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
401 void *args) 532 void *args)