diff options
-rw-r--r-- | arch/x86_64/kernel/nmi.c | 1 | ||||
-rw-r--r-- | arch/x86_64/kernel/traps.c | 2 | ||||
-rw-r--r-- | arch/x86_64/mm/fault.c | 73 | ||||
-rw-r--r-- | include/asm-x86_64/pgalloc.h | 28 | ||||
-rw-r--r-- | include/asm-x86_64/pgtable.h | 4 |
5 files changed, 95 insertions, 13 deletions
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 66c009e10bac..d9e4067faf05 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
@@ -534,6 +534,7 @@ asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) | |||
534 | 534 | ||
535 | void set_nmi_callback(nmi_callback_t callback) | 535 | void set_nmi_callback(nmi_callback_t callback) |
536 | { | 536 | { |
537 | vmalloc_sync_all(); | ||
537 | rcu_assign_pointer(nmi_callback, callback); | 538 | rcu_assign_pointer(nmi_callback, callback); |
538 | } | 539 | } |
539 | 540 | ||
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 28d50dc540e8..b25bc904d42d 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c | |||
@@ -78,6 +78,8 @@ int register_die_notifier(struct notifier_block *nb) | |||
78 | { | 78 | { |
79 | int err = 0; | 79 | int err = 0; |
80 | unsigned long flags; | 80 | unsigned long flags; |
81 | |||
82 | vmalloc_sync_all(); | ||
81 | spin_lock_irqsave(&die_notifier_lock, flags); | 83 | spin_lock_irqsave(&die_notifier_lock, flags); |
82 | err = notifier_chain_register(&die_chain, nb); | 84 | err = notifier_chain_register(&die_chain, nb); |
83 | spin_unlock_irqrestore(&die_notifier_lock, flags); | 85 | spin_unlock_irqrestore(&die_notifier_lock, flags); |
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 2e7c3c8ffe03..de91e17daf6f 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c | |||
@@ -264,6 +264,8 @@ static int vmalloc_fault(unsigned long address) | |||
264 | return -1; | 264 | return -1; |
265 | if (pgd_none(*pgd)) | 265 | if (pgd_none(*pgd)) |
266 | set_pgd(pgd, *pgd_ref); | 266 | set_pgd(pgd, *pgd_ref); |
267 | else | ||
268 | BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); | ||
267 | 269 | ||
268 | /* Below here mismatches are bugs because these lower tables | 270 | /* Below here mismatches are bugs because these lower tables |
269 | are shared */ | 271 | are shared */ |
@@ -314,16 +316,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | |||
314 | 316 | ||
315 | /* get the address */ | 317 | /* get the address */ |
316 | __asm__("movq %%cr2,%0":"=r" (address)); | 318 | __asm__("movq %%cr2,%0":"=r" (address)); |
317 | if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | ||
318 | SIGSEGV) == NOTIFY_STOP) | ||
319 | return; | ||
320 | |||
321 | if (likely(regs->eflags & X86_EFLAGS_IF)) | ||
322 | local_irq_enable(); | ||
323 | |||
324 | if (unlikely(page_fault_trace)) | ||
325 | printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", | ||
326 | regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); | ||
327 | 319 | ||
328 | tsk = current; | 320 | tsk = current; |
329 | mm = tsk->mm; | 321 | mm = tsk->mm; |
@@ -351,10 +343,12 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | |||
351 | */ | 343 | */ |
352 | if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && | 344 | if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && |
353 | ((address >= VMALLOC_START && address < VMALLOC_END))) { | 345 | ((address >= VMALLOC_START && address < VMALLOC_END))) { |
354 | if (vmalloc_fault(address) < 0) | 346 | if (vmalloc_fault(address) >= 0) |
355 | goto bad_area_nosemaphore; | 347 | return; |
356 | return; | ||
357 | } | 348 | } |
349 | if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | ||
350 | SIGSEGV) == NOTIFY_STOP) | ||
351 | return; | ||
358 | /* | 352 | /* |
359 | * Don't take the mm semaphore here. If we fixup a prefetch | 353 | * Don't take the mm semaphore here. If we fixup a prefetch |
360 | * fault we could otherwise deadlock. | 354 | * fault we could otherwise deadlock. |
@@ -362,6 +356,17 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | |||
362 | goto bad_area_nosemaphore; | 356 | goto bad_area_nosemaphore; |
363 | } | 357 | } |
364 | 358 | ||
359 | if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | ||
360 | SIGSEGV) == NOTIFY_STOP) | ||
361 | return; | ||
362 | |||
363 | if (likely(regs->eflags & X86_EFLAGS_IF)) | ||
364 | local_irq_enable(); | ||
365 | |||
366 | if (unlikely(page_fault_trace)) | ||
367 | printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", | ||
368 | regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); | ||
369 | |||
365 | if (unlikely(error_code & PF_RSVD)) | 370 | if (unlikely(error_code & PF_RSVD)) |
366 | pgtable_bad(address, regs, error_code); | 371 | pgtable_bad(address, regs, error_code); |
367 | 372 | ||
@@ -571,6 +576,48 @@ do_sigbus: | |||
571 | return; | 576 | return; |
572 | } | 577 | } |
573 | 578 | ||
579 | DEFINE_SPINLOCK(pgd_lock); | ||
580 | struct page *pgd_list; | ||
581 | |||
582 | void vmalloc_sync_all(void) | ||
583 | { | ||
584 | /* Note that races in the updates of insync and start aren't | ||
585 | problematic: | ||
586 | insync can only get set bits added, and updates to start are only | ||
587 | improving performance (without affecting correctness if undone). */ | ||
588 | static DECLARE_BITMAP(insync, PTRS_PER_PGD); | ||
589 | static unsigned long start = VMALLOC_START & PGDIR_MASK; | ||
590 | unsigned long address; | ||
591 | |||
592 | for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { | ||
593 | if (!test_bit(pgd_index(address), insync)) { | ||
594 | const pgd_t *pgd_ref = pgd_offset_k(address); | ||
595 | struct page *page; | ||
596 | |||
597 | if (pgd_none(*pgd_ref)) | ||
598 | continue; | ||
599 | spin_lock(&pgd_lock); | ||
600 | for (page = pgd_list; page; | ||
601 | page = (struct page *)page->index) { | ||
602 | pgd_t *pgd; | ||
603 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | ||
604 | if (pgd_none(*pgd)) | ||
605 | set_pgd(pgd, *pgd_ref); | ||
606 | else | ||
607 | BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); | ||
608 | } | ||
609 | spin_unlock(&pgd_lock); | ||
610 | set_bit(pgd_index(address), insync); | ||
611 | } | ||
612 | if (address == start) | ||
613 | start = address + PGDIR_SIZE; | ||
614 | } | ||
615 | /* Check that there is no need to do the same for the modules area. */ | ||
616 | BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); | ||
617 | BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == | ||
618 | (__START_KERNEL & PGDIR_MASK))); | ||
619 | } | ||
620 | |||
574 | static int __init enable_pagefaulttrace(char *str) | 621 | static int __init enable_pagefaulttrace(char *str) |
575 | { | 622 | { |
576 | page_fault_trace = 1; | 623 | page_fault_trace = 1; |
diff --git a/include/asm-x86_64/pgalloc.h b/include/asm-x86_64/pgalloc.h index 08cad2482bcb..43d4c333a8b1 100644 --- a/include/asm-x86_64/pgalloc.h +++ b/include/asm-x86_64/pgalloc.h | |||
@@ -45,12 +45,39 @@ static inline void pud_free (pud_t *pud) | |||
45 | free_page((unsigned long)pud); | 45 | free_page((unsigned long)pud); |
46 | } | 46 | } |
47 | 47 | ||
48 | static inline void pgd_list_add(pgd_t *pgd) | ||
49 | { | ||
50 | struct page *page = virt_to_page(pgd); | ||
51 | |||
52 | spin_lock(&pgd_lock); | ||
53 | page->index = (pgoff_t)pgd_list; | ||
54 | if (pgd_list) | ||
55 | pgd_list->private = (unsigned long)&page->index; | ||
56 | pgd_list = page; | ||
57 | page->private = (unsigned long)&pgd_list; | ||
58 | spin_unlock(&pgd_lock); | ||
59 | } | ||
60 | |||
61 | static inline void pgd_list_del(pgd_t *pgd) | ||
62 | { | ||
63 | struct page *next, **pprev, *page = virt_to_page(pgd); | ||
64 | |||
65 | spin_lock(&pgd_lock); | ||
66 | next = (struct page *)page->index; | ||
67 | pprev = (struct page **)page->private; | ||
68 | *pprev = next; | ||
69 | if (next) | ||
70 | next->private = (unsigned long)pprev; | ||
71 | spin_unlock(&pgd_lock); | ||
72 | } | ||
73 | |||
48 | static inline pgd_t *pgd_alloc(struct mm_struct *mm) | 74 | static inline pgd_t *pgd_alloc(struct mm_struct *mm) |
49 | { | 75 | { |
50 | unsigned boundary; | 76 | unsigned boundary; |
51 | pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); | 77 | pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); |
52 | if (!pgd) | 78 | if (!pgd) |
53 | return NULL; | 79 | return NULL; |
80 | pgd_list_add(pgd); | ||
54 | /* | 81 | /* |
55 | * Copy kernel pointers in from init. | 82 | * Copy kernel pointers in from init. |
56 | * Could keep a freelist or slab cache of those because the kernel | 83 | * Could keep a freelist or slab cache of those because the kernel |
@@ -67,6 +94,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) | |||
67 | static inline void pgd_free(pgd_t *pgd) | 94 | static inline void pgd_free(pgd_t *pgd) |
68 | { | 95 | { |
69 | BUG_ON((unsigned long)pgd & (PAGE_SIZE-1)); | 96 | BUG_ON((unsigned long)pgd & (PAGE_SIZE-1)); |
97 | pgd_list_del(pgd); | ||
70 | free_page((unsigned long)pgd); | 98 | free_page((unsigned long)pgd); |
71 | } | 99 | } |
72 | 100 | ||
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index def903287193..31e83c3bd022 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h | |||
@@ -420,6 +420,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | |||
420 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) | 420 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) |
421 | #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) | 421 | #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) |
422 | 422 | ||
423 | extern spinlock_t pgd_lock; | ||
424 | extern struct page *pgd_list; | ||
425 | void vmalloc_sync_all(void); | ||
426 | |||
423 | #endif /* !__ASSEMBLY__ */ | 427 | #endif /* !__ASSEMBLY__ */ |
424 | 428 | ||
425 | extern int kern_addr_valid(unsigned long addr); | 429 | extern int kern_addr_valid(unsigned long addr); |