diff options
Diffstat (limited to 'arch/x86/mm/kmmio.c')
-rw-r--r-- | arch/x86/mm/kmmio.c | 164 |
1 files changed, 104 insertions, 60 deletions
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 93d82038af4b..6a518dd08a36 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -32,11 +32,14 @@ struct kmmio_fault_page { | |||
32 | struct list_head list; | 32 | struct list_head list; |
33 | struct kmmio_fault_page *release_next; | 33 | struct kmmio_fault_page *release_next; |
34 | unsigned long page; /* location of the fault page */ | 34 | unsigned long page; /* location of the fault page */ |
35 | bool old_presence; /* page presence prior to arming */ | ||
36 | bool armed; | ||
35 | 37 | ||
36 | /* | 38 | /* |
37 | * Number of times this page has been registered as a part | 39 | * Number of times this page has been registered as a part |
38 | * of a probe. If zero, page is disarmed and this may be freed. | 40 | * of a probe. If zero, page is disarmed and this may be freed. |
39 | * Used only by writers (RCU). | 41 | * Used only by writers (RCU) and post_kmmio_handler(). |
42 | * Protected by kmmio_lock, when linked into kmmio_page_table. | ||
40 | */ | 43 | */ |
41 | int count; | 44 | int count; |
42 | }; | 45 | }; |
@@ -105,57 +108,85 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) | |||
105 | return NULL; | 108 | return NULL; |
106 | } | 109 | } |
107 | 110 | ||
108 | static void set_page_present(unsigned long addr, bool present, | 111 | static void set_pmd_presence(pmd_t *pmd, bool present, bool *old) |
109 | unsigned int *pglevel) | 112 | { |
113 | pmdval_t v = pmd_val(*pmd); | ||
114 | *old = !!(v & _PAGE_PRESENT); | ||
115 | v &= ~_PAGE_PRESENT; | ||
116 | if (present) | ||
117 | v |= _PAGE_PRESENT; | ||
118 | set_pmd(pmd, __pmd(v)); | ||
119 | } | ||
120 | |||
121 | static void set_pte_presence(pte_t *pte, bool present, bool *old) | ||
122 | { | ||
123 | pteval_t v = pte_val(*pte); | ||
124 | *old = !!(v & _PAGE_PRESENT); | ||
125 | v &= ~_PAGE_PRESENT; | ||
126 | if (present) | ||
127 | v |= _PAGE_PRESENT; | ||
128 | set_pte_atomic(pte, __pte(v)); | ||
129 | } | ||
130 | |||
131 | static int set_page_presence(unsigned long addr, bool present, bool *old) | ||
110 | { | 132 | { |
111 | pteval_t pteval; | ||
112 | pmdval_t pmdval; | ||
113 | unsigned int level; | 133 | unsigned int level; |
114 | pmd_t *pmd; | ||
115 | pte_t *pte = lookup_address(addr, &level); | 134 | pte_t *pte = lookup_address(addr, &level); |
116 | 135 | ||
117 | if (!pte) { | 136 | if (!pte) { |
118 | pr_err("kmmio: no pte for page 0x%08lx\n", addr); | 137 | pr_err("kmmio: no pte for page 0x%08lx\n", addr); |
119 | return; | 138 | return -1; |
120 | } | 139 | } |
121 | 140 | ||
122 | if (pglevel) | ||
123 | *pglevel = level; | ||
124 | |||
125 | switch (level) { | 141 | switch (level) { |
126 | case PG_LEVEL_2M: | 142 | case PG_LEVEL_2M: |
127 | pmd = (pmd_t *)pte; | 143 | set_pmd_presence((pmd_t *)pte, present, old); |
128 | pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; | ||
129 | if (present) | ||
130 | pmdval |= _PAGE_PRESENT; | ||
131 | set_pmd(pmd, __pmd(pmdval)); | ||
132 | break; | 144 | break; |
133 | |||
134 | case PG_LEVEL_4K: | 145 | case PG_LEVEL_4K: |
135 | pteval = pte_val(*pte) & ~_PAGE_PRESENT; | 146 | set_pte_presence(pte, present, old); |
136 | if (present) | ||
137 | pteval |= _PAGE_PRESENT; | ||
138 | set_pte_atomic(pte, __pte(pteval)); | ||
139 | break; | 147 | break; |
140 | |||
141 | default: | 148 | default: |
142 | pr_err("kmmio: unexpected page level 0x%x.\n", level); | 149 | pr_err("kmmio: unexpected page level 0x%x.\n", level); |
143 | return; | 150 | return -1; |
144 | } | 151 | } |
145 | 152 | ||
146 | __flush_tlb_one(addr); | 153 | __flush_tlb_one(addr); |
154 | return 0; | ||
147 | } | 155 | } |
148 | 156 | ||
149 | /** Mark the given page as not present. Access to it will trigger a fault. */ | 157 | /* |
150 | static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) | 158 | * Mark the given page as not present. Access to it will trigger a fault. |
159 | * | ||
160 | * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the | ||
161 | * protection is ignored here. RCU read lock is assumed held, so the struct | ||
162 | * will not disappear unexpectedly. Furthermore, the caller must guarantee, | ||
163 | * that double arming the same virtual address (page) cannot occur. | ||
164 | * | ||
165 | * Double disarming on the other hand is allowed, and may occur when a fault | ||
166 | * and mmiotrace shutdown happen simultaneously. | ||
167 | */ | ||
168 | static int arm_kmmio_fault_page(struct kmmio_fault_page *f) | ||
151 | { | 169 | { |
152 | set_page_present(page & PAGE_MASK, false, pglevel); | 170 | int ret; |
171 | WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); | ||
172 | if (f->armed) { | ||
173 | pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", | ||
174 | f->page, f->count, f->old_presence); | ||
175 | } | ||
176 | ret = set_page_presence(f->page, false, &f->old_presence); | ||
177 | WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); | ||
178 | f->armed = true; | ||
179 | return ret; | ||
153 | } | 180 | } |
154 | 181 | ||
155 | /** Mark the given page as present. */ | 182 | /** Restore the given page to saved presence state. */ |
156 | static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) | 183 | static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) |
157 | { | 184 | { |
158 | set_page_present(page & PAGE_MASK, true, pglevel); | 185 | bool tmp; |
186 | int ret = set_page_presence(f->page, f->old_presence, &tmp); | ||
187 | WARN_ONCE(ret < 0, | ||
188 | KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); | ||
189 | f->armed = false; | ||
159 | } | 190 | } |
160 | 191 | ||
161 | /* | 192 | /* |
@@ -202,28 +233,32 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
202 | 233 | ||
203 | ctx = &get_cpu_var(kmmio_ctx); | 234 | ctx = &get_cpu_var(kmmio_ctx); |
204 | if (ctx->active) { | 235 | if (ctx->active) { |
205 | disarm_kmmio_fault_page(faultpage->page, NULL); | ||
206 | if (addr == ctx->addr) { | 236 | if (addr == ctx->addr) { |
207 | /* | 237 | /* |
208 | * On SMP we sometimes get recursive probe hits on the | 238 | * A second fault on the same page means some other |
209 | * same address. Context is already saved, fall out. | 239 | * condition needs handling by do_page_fault(), the |
240 | * page really not being present is the most common. | ||
210 | */ | 241 | */ |
211 | pr_debug("kmmio: duplicate probe hit on CPU %d, for " | 242 | pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", |
212 | "address 0x%08lx.\n", | 243 | addr, smp_processor_id()); |
213 | smp_processor_id(), addr); | 244 | |
214 | ret = 1; | 245 | if (!faultpage->old_presence) |
215 | goto no_kmmio_ctx; | 246 | pr_info("kmmio: unexpected secondary hit for " |
216 | } | 247 | "address 0x%08lx on CPU %d.\n", addr, |
217 | /* | 248 | smp_processor_id()); |
218 | * Prevent overwriting already in-flight context. | 249 | } else { |
219 | * This should not happen, let's hope disarming at least | 250 | /* |
220 | * prevents a panic. | 251 | * Prevent overwriting already in-flight context. |
221 | */ | 252 | * This should not happen, let's hope disarming at |
222 | pr_emerg("kmmio: recursive probe hit on CPU %d, " | 253 | * least prevents a panic. |
254 | */ | ||
255 | pr_emerg("kmmio: recursive probe hit on CPU %d, " | ||
223 | "for address 0x%08lx. Ignoring.\n", | 256 | "for address 0x%08lx. Ignoring.\n", |
224 | smp_processor_id(), addr); | 257 | smp_processor_id(), addr); |
225 | pr_emerg("kmmio: previous hit was at 0x%08lx.\n", | 258 | pr_emerg("kmmio: previous hit was at 0x%08lx.\n", |
226 | ctx->addr); | 259 | ctx->addr); |
260 | disarm_kmmio_fault_page(faultpage); | ||
261 | } | ||
227 | goto no_kmmio_ctx; | 262 | goto no_kmmio_ctx; |
228 | } | 263 | } |
229 | ctx->active++; | 264 | ctx->active++; |
@@ -244,7 +279,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
244 | regs->flags &= ~X86_EFLAGS_IF; | 279 | regs->flags &= ~X86_EFLAGS_IF; |
245 | 280 | ||
246 | /* Now we set present bit in PTE and single step. */ | 281 | /* Now we set present bit in PTE and single step. */ |
247 | disarm_kmmio_fault_page(ctx->fpage->page, NULL); | 282 | disarm_kmmio_fault_page(ctx->fpage); |
248 | 283 | ||
249 | /* | 284 | /* |
250 | * If another cpu accesses the same page while we are stepping, | 285 | * If another cpu accesses the same page while we are stepping, |
@@ -275,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | |||
275 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); | 310 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); |
276 | 311 | ||
277 | if (!ctx->active) { | 312 | if (!ctx->active) { |
278 | pr_debug("kmmio: spurious debug trap on CPU %d.\n", | 313 | pr_warning("kmmio: spurious debug trap on CPU %d.\n", |
279 | smp_processor_id()); | 314 | smp_processor_id()); |
280 | goto out; | 315 | goto out; |
281 | } | 316 | } |
@@ -283,7 +318,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | |||
283 | if (ctx->probe && ctx->probe->post_handler) | 318 | if (ctx->probe && ctx->probe->post_handler) |
284 | ctx->probe->post_handler(ctx->probe, condition, regs); | 319 | ctx->probe->post_handler(ctx->probe, condition, regs); |
285 | 320 | ||
286 | arm_kmmio_fault_page(ctx->fpage->page, NULL); | 321 | /* Prevent racing against release_kmmio_fault_page(). */ |
322 | spin_lock(&kmmio_lock); | ||
323 | if (ctx->fpage->count) | ||
324 | arm_kmmio_fault_page(ctx->fpage); | ||
325 | spin_unlock(&kmmio_lock); | ||
287 | 326 | ||
288 | regs->flags &= ~X86_EFLAGS_TF; | 327 | regs->flags &= ~X86_EFLAGS_TF; |
289 | regs->flags |= ctx->saved_flags; | 328 | regs->flags |= ctx->saved_flags; |
@@ -315,20 +354,24 @@ static int add_kmmio_fault_page(unsigned long page) | |||
315 | f = get_kmmio_fault_page(page); | 354 | f = get_kmmio_fault_page(page); |
316 | if (f) { | 355 | if (f) { |
317 | if (!f->count) | 356 | if (!f->count) |
318 | arm_kmmio_fault_page(f->page, NULL); | 357 | arm_kmmio_fault_page(f); |
319 | f->count++; | 358 | f->count++; |
320 | return 0; | 359 | return 0; |
321 | } | 360 | } |
322 | 361 | ||
323 | f = kmalloc(sizeof(*f), GFP_ATOMIC); | 362 | f = kzalloc(sizeof(*f), GFP_ATOMIC); |
324 | if (!f) | 363 | if (!f) |
325 | return -1; | 364 | return -1; |
326 | 365 | ||
327 | f->count = 1; | 366 | f->count = 1; |
328 | f->page = page; | 367 | f->page = page; |
329 | list_add_rcu(&f->list, kmmio_page_list(f->page)); | ||
330 | 368 | ||
331 | arm_kmmio_fault_page(f->page, NULL); | 369 | if (arm_kmmio_fault_page(f)) { |
370 | kfree(f); | ||
371 | return -1; | ||
372 | } | ||
373 | |||
374 | list_add_rcu(&f->list, kmmio_page_list(f->page)); | ||
332 | 375 | ||
333 | return 0; | 376 | return 0; |
334 | } | 377 | } |
@@ -347,7 +390,7 @@ static void release_kmmio_fault_page(unsigned long page, | |||
347 | f->count--; | 390 | f->count--; |
348 | BUG_ON(f->count < 0); | 391 | BUG_ON(f->count < 0); |
349 | if (!f->count) { | 392 | if (!f->count) { |
350 | disarm_kmmio_fault_page(f->page, NULL); | 393 | disarm_kmmio_fault_page(f); |
351 | f->release_next = *release_list; | 394 | f->release_next = *release_list; |
352 | *release_list = f; | 395 | *release_list = f; |
353 | } | 396 | } |
@@ -408,23 +451,24 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head) | |||
408 | 451 | ||
409 | static void remove_kmmio_fault_pages(struct rcu_head *head) | 452 | static void remove_kmmio_fault_pages(struct rcu_head *head) |
410 | { | 453 | { |
411 | struct kmmio_delayed_release *dr = container_of( | 454 | struct kmmio_delayed_release *dr = |
412 | head, | 455 | container_of(head, struct kmmio_delayed_release, rcu); |
413 | struct kmmio_delayed_release, | ||
414 | rcu); | ||
415 | struct kmmio_fault_page *p = dr->release_list; | 456 | struct kmmio_fault_page *p = dr->release_list; |
416 | struct kmmio_fault_page **prevp = &dr->release_list; | 457 | struct kmmio_fault_page **prevp = &dr->release_list; |
417 | unsigned long flags; | 458 | unsigned long flags; |
459 | |||
418 | spin_lock_irqsave(&kmmio_lock, flags); | 460 | spin_lock_irqsave(&kmmio_lock, flags); |
419 | while (p) { | 461 | while (p) { |
420 | if (!p->count) | 462 | if (!p->count) { |
421 | list_del_rcu(&p->list); | 463 | list_del_rcu(&p->list); |
422 | else | 464 | prevp = &p->release_next; |
465 | } else { | ||
423 | *prevp = p->release_next; | 466 | *prevp = p->release_next; |
424 | prevp = &p->release_next; | 467 | } |
425 | p = p->release_next; | 468 | p = p->release_next; |
426 | } | 469 | } |
427 | spin_unlock_irqrestore(&kmmio_lock, flags); | 470 | spin_unlock_irqrestore(&kmmio_lock, flags); |
471 | |||
428 | /* This is the real RCU destroy call. */ | 472 | /* This is the real RCU destroy call. */ |
429 | call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); | 473 | call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); |
430 | } | 474 | } |