aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/mm/fault_32.c116
-rw-r--r--arch/x86/mm/fault_64.c148
2 files changed, 263 insertions, 1 deletions
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 28ea3d3ec8f8..7d9ecbbba745 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -173,8 +173,17 @@ static void force_sig_info_fault(int si_signo, int si_code,
173 force_sig_info(si_signo, &info, tsk); 173 force_sig_info(si_signo, &info, tsk);
174} 174}
175 175
176#ifdef CONFIG_X86_64
177static int bad_address(void *p)
178{
179 unsigned long dummy;
180 return probe_kernel_address((unsigned long *)p, dummy);
181}
182#endif
183
176void dump_pagetable(unsigned long address) 184void dump_pagetable(unsigned long address)
177{ 185{
186#ifdef CONFIG_X86_32
178 __typeof__(pte_val(__pte(0))) page; 187 __typeof__(pte_val(__pte(0))) page;
179 188
180 page = read_cr3(); 189 page = read_cr3();
@@ -209,8 +218,42 @@ void dump_pagetable(unsigned long address)
209 } 218 }
210 219
211 printk("\n"); 220 printk("\n");
221#else /* CONFIG_X86_64 */
222 pgd_t *pgd;
223 pud_t *pud;
224 pmd_t *pmd;
225 pte_t *pte;
226
227 pgd = (pgd_t *)read_cr3();
228
229 pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
230 pgd += pgd_index(address);
231 if (bad_address(pgd)) goto bad;
232 printk("PGD %lx ", pgd_val(*pgd));
233 if (!pgd_present(*pgd)) goto ret;
234
235 pud = pud_offset(pgd, address);
236 if (bad_address(pud)) goto bad;
237 printk("PUD %lx ", pud_val(*pud));
238 if (!pud_present(*pud)) goto ret;
239
240 pmd = pmd_offset(pud, address);
241 if (bad_address(pmd)) goto bad;
242 printk("PMD %lx ", pmd_val(*pmd));
243 if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
244
245 pte = pte_offset_kernel(pmd, address);
246 if (bad_address(pte)) goto bad;
247 printk("PTE %lx", pte_val(*pte));
248ret:
249 printk("\n");
250 return;
251bad:
252 printk("BAD\n");
253#endif
212} 254}
213 255
256#ifdef CONFIG_X86_32
214static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) 257static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
215{ 258{
216 unsigned index = pgd_index(address); 259 unsigned index = pgd_index(address);
@@ -246,6 +289,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
246 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); 289 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
247 return pmd_k; 290 return pmd_k;
248} 291}
292#endif
249 293
250#ifdef CONFIG_X86_64 294#ifdef CONFIG_X86_64
251static const char errata93_warning[] = 295static const char errata93_warning[] =
@@ -326,6 +370,7 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
326static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, 370static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
327 unsigned long address) 371 unsigned long address)
328{ 372{
373#ifdef CONFIG_X86_32
329 if (!oops_may_print()) 374 if (!oops_may_print())
330 return; 375 return;
331 376
@@ -350,8 +395,40 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
350 printk(KERN_ALERT "IP:"); 395 printk(KERN_ALERT "IP:");
351 printk_address(regs->ip, 1); 396 printk_address(regs->ip, 1);
352 dump_pagetable(address); 397 dump_pagetable(address);
398#else /* CONFIG_X86_64 */
399 printk(KERN_ALERT "BUG: unable to handle kernel ");
400 if (address < PAGE_SIZE)
401 printk(KERN_CONT "NULL pointer dereference");
402 else
403 printk(KERN_CONT "paging request");
404 printk(KERN_CONT " at %016lx\n", address);
405
406 printk(KERN_ALERT "IP:");
407 printk_address(regs->ip, 1);
408 dump_pagetable(address);
409#endif
353} 410}
354 411
412#ifdef CONFIG_X86_64
413static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
414 unsigned long error_code)
415{
416 unsigned long flags = oops_begin();
417 struct task_struct *tsk;
418
419 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
420 current->comm, address);
421 dump_pagetable(address);
422 tsk = current;
423 tsk->thread.cr2 = address;
424 tsk->thread.trap_no = 14;
425 tsk->thread.error_code = error_code;
426 if (__die("Bad pagetable", regs, error_code))
427 regs = NULL;
428 oops_end(flags, regs, SIGKILL);
429}
430#endif
431
355/* 432/*
356 * Handle a fault on the vmalloc or module mapping area 433 * Handle a fault on the vmalloc or module mapping area
357 * 434 *
@@ -706,6 +783,7 @@ do_sigbus:
706 783
707void vmalloc_sync_all(void) 784void vmalloc_sync_all(void)
708{ 785{
786#ifdef CONFIG_X86_32
709 /* 787 /*
710 * Note that races in the updates of insync and start aren't 788 * Note that races in the updates of insync and start aren't
711 * problematic: insync can only get set bits added, and updates to 789 * problematic: insync can only get set bits added, and updates to
@@ -740,4 +818,42 @@ void vmalloc_sync_all(void)
740 if (address == start && test_bit(pgd_index(address), insync)) 818 if (address == start && test_bit(pgd_index(address), insync))
741 start = address + PGDIR_SIZE; 819 start = address + PGDIR_SIZE;
742 } 820 }
821#else /* CONFIG_X86_64 */
822 /*
823 * Note that races in the updates of insync and start aren't
824 * problematic: insync can only get set bits added, and updates to
825 * start are only improving performance (without affecting correctness
826 * if undone).
827 */
828 static DECLARE_BITMAP(insync, PTRS_PER_PGD);
829 static unsigned long start = VMALLOC_START & PGDIR_MASK;
830 unsigned long address;
831
832 for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
833 if (!test_bit(pgd_index(address), insync)) {
834 const pgd_t *pgd_ref = pgd_offset_k(address);
835 struct page *page;
836
837 if (pgd_none(*pgd_ref))
838 continue;
839 spin_lock(&pgd_lock);
840 list_for_each_entry(page, &pgd_list, lru) {
841 pgd_t *pgd;
842 pgd = (pgd_t *)page_address(page) + pgd_index(address);
843 if (pgd_none(*pgd))
844 set_pgd(pgd, *pgd_ref);
845 else
846 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
847 }
848 spin_unlock(&pgd_lock);
849 set_bit(pgd_index(address), insync);
850 }
851 if (address == start)
852 start = address + PGDIR_SIZE;
853 }
854 /* Check that there is no need to do the same for the modules area. */
855 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
856 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
857 (__START_KERNEL & PGDIR_MASK)));
858#endif
743} 859}
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index e12c34ba4ba1..edca689c62d5 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -176,14 +176,52 @@ static void force_sig_info_fault(int si_signo, int si_code,
176 force_sig_info(si_signo, &info, tsk); 176 force_sig_info(si_signo, &info, tsk);
177} 177}
178 178
179#ifdef CONFIG_X86_64
179static int bad_address(void *p) 180static int bad_address(void *p)
180{ 181{
181 unsigned long dummy; 182 unsigned long dummy;
182 return probe_kernel_address((unsigned long *)p, dummy); 183 return probe_kernel_address((unsigned long *)p, dummy);
183} 184}
185#endif
184 186
185void dump_pagetable(unsigned long address) 187void dump_pagetable(unsigned long address)
186{ 188{
189#ifdef CONFIG_X86_32
190 __typeof__(pte_val(__pte(0))) page;
191
192 page = read_cr3();
193 page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
194#ifdef CONFIG_X86_PAE
195 printk("*pdpt = %016Lx ", page);
196 if ((page >> PAGE_SHIFT) < max_low_pfn
197 && page & _PAGE_PRESENT) {
198 page &= PAGE_MASK;
199 page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
200 & (PTRS_PER_PMD - 1)];
201 printk(KERN_CONT "*pde = %016Lx ", page);
202 page &= ~_PAGE_NX;
203 }
204#else
205 printk("*pde = %08lx ", page);
206#endif
207
208 /*
209 * We must not directly access the pte in the highpte
210 * case if the page table is located in highmem.
211 * And let's rather not kmap-atomic the pte, just in case
212 * it's allocated already.
213 */
214 if ((page >> PAGE_SHIFT) < max_low_pfn
215 && (page & _PAGE_PRESENT)
216 && !(page & _PAGE_PSE)) {
217 page &= PAGE_MASK;
218 page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
219 & (PTRS_PER_PTE - 1)];
220 printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
221 }
222
223 printk("\n");
224#else /* CONFIG_X86_64 */
187 pgd_t *pgd; 225 pgd_t *pgd;
188 pud_t *pud; 226 pud_t *pud;
189 pmd_t *pmd; 227 pmd_t *pmd;
@@ -215,7 +253,46 @@ ret:
215 return; 253 return;
216bad: 254bad:
217 printk("BAD\n"); 255 printk("BAD\n");
256#endif
257}
258
259#ifdef CONFIG_X86_32
260static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
261{
262 unsigned index = pgd_index(address);
263 pgd_t *pgd_k;
264 pud_t *pud, *pud_k;
265 pmd_t *pmd, *pmd_k;
266
267 pgd += index;
268 pgd_k = init_mm.pgd + index;
269
270 if (!pgd_present(*pgd_k))
271 return NULL;
272
273 /*
274 * set_pgd(pgd, *pgd_k); here would be useless on PAE
275 * and redundant with the set_pmd() on non-PAE. As would
276 * set_pud.
277 */
278
279 pud = pud_offset(pgd, address);
280 pud_k = pud_offset(pgd_k, address);
281 if (!pud_present(*pud_k))
282 return NULL;
283
284 pmd = pmd_offset(pud, address);
285 pmd_k = pmd_offset(pud_k, address);
286 if (!pmd_present(*pmd_k))
287 return NULL;
288 if (!pmd_present(*pmd)) {
289 set_pmd(pmd, *pmd_k);
290 arch_flush_lazy_mmu_mode();
291 } else
292 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
293 return pmd_k;
218} 294}
295#endif
219 296
220#ifdef CONFIG_X86_64 297#ifdef CONFIG_X86_64
221static const char errata93_warning[] = 298static const char errata93_warning[] =
@@ -296,6 +373,32 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
296static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, 373static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
297 unsigned long address) 374 unsigned long address)
298{ 375{
376#ifdef CONFIG_X86_32
377 if (!oops_may_print())
378 return;
379
380#ifdef CONFIG_X86_PAE
381 if (error_code & PF_INSTR) {
382 int level;
383 pte_t *pte = lookup_address(address, &level);
384
385 if (pte && pte_present(*pte) && !pte_exec(*pte))
386 printk(KERN_CRIT "kernel tried to execute "
387 "NX-protected page - exploit attempt? "
388 "(uid: %d)\n", current->uid);
389 }
390#endif
391 printk(KERN_ALERT "BUG: unable to handle kernel ");
392 if (address < PAGE_SIZE)
393 printk(KERN_CONT "NULL pointer dereference");
394 else
395 printk(KERN_CONT "paging request");
396 printk(KERN_CONT " at %08lx\n", address);
397
398 printk(KERN_ALERT "IP:");
399 printk_address(regs->ip, 1);
400 dump_pagetable(address);
401#else /* CONFIG_X86_64 */
299 printk(KERN_ALERT "BUG: unable to handle kernel "); 402 printk(KERN_ALERT "BUG: unable to handle kernel ");
300 if (address < PAGE_SIZE) 403 if (address < PAGE_SIZE)
301 printk(KERN_CONT "NULL pointer dereference"); 404 printk(KERN_CONT "NULL pointer dereference");
@@ -306,8 +409,10 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
306 printk(KERN_ALERT "IP:"); 409 printk(KERN_ALERT "IP:");
307 printk_address(regs->ip, 1); 410 printk_address(regs->ip, 1);
308 dump_pagetable(address); 411 dump_pagetable(address);
412#endif
309} 413}
310 414
415#ifdef CONFIG_X86_64
311static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, 416static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
312 unsigned long error_code) 417 unsigned long error_code)
313{ 418{
@@ -325,6 +430,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
325 regs = NULL; 430 regs = NULL;
326 oops_end(flags, regs, SIGKILL); 431 oops_end(flags, regs, SIGKILL);
327} 432}
433#endif
328 434
329/* 435/*
330 * Handle a fault on the vmalloc area 436 * Handle a fault on the vmalloc area
@@ -590,12 +696,15 @@ bad_area:
590bad_area_nosemaphore: 696bad_area_nosemaphore:
591 /* User mode accesses just cause a SIGSEGV */ 697 /* User mode accesses just cause a SIGSEGV */
592 if (error_code & PF_USER) { 698 if (error_code & PF_USER) {
593
594 /* 699 /*
595 * It's possible to have interrupts off here. 700 * It's possible to have interrupts off here.
596 */ 701 */
597 local_irq_enable(); 702 local_irq_enable();
598 703
704 /*
705 * Valid to do another page fault here because this one came
706 * from user space.
707 */
599 if (is_prefetch(regs, address, error_code)) 708 if (is_prefetch(regs, address, error_code))
600 return; 709 return;
601 710
@@ -696,6 +805,42 @@ LIST_HEAD(pgd_list);
696 805
697void vmalloc_sync_all(void) 806void vmalloc_sync_all(void)
698{ 807{
808#ifdef CONFIG_X86_32
809 /*
810 * Note that races in the updates of insync and start aren't
811 * problematic: insync can only get set bits added, and updates to
812 * start are only improving performance (without affecting correctness
813 * if undone).
814 */
815 static DECLARE_BITMAP(insync, PTRS_PER_PGD);
816 static unsigned long start = TASK_SIZE;
817 unsigned long address;
818
819 if (SHARED_KERNEL_PMD)
820 return;
821
822 BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
823 for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
824 if (!test_bit(pgd_index(address), insync)) {
825 unsigned long flags;
826 struct page *page;
827
828 spin_lock_irqsave(&pgd_lock, flags);
829 for (page = pgd_list; page; page =
830 (struct page *)page->index)
831 if (!vmalloc_sync_one(page_address(page),
832 address)) {
833 BUG_ON(page != pgd_list);
834 break;
835 }
836 spin_unlock_irqrestore(&pgd_lock, flags);
837 if (!page)
838 set_bit(pgd_index(address), insync);
839 }
840 if (address == start && test_bit(pgd_index(address), insync))
841 start = address + PGDIR_SIZE;
842 }
843#else /* CONFIG_X86_64 */
699 /* 844 /*
700 * Note that races in the updates of insync and start aren't 845 * Note that races in the updates of insync and start aren't
701 * problematic: insync can only get set bits added, and updates to 846 * problematic: insync can only get set bits added, and updates to
@@ -732,4 +877,5 @@ void vmalloc_sync_all(void)
732 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); 877 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
733 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == 878 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
734 (__START_KERNEL & PGDIR_MASK))); 879 (__START_KERNEL & PGDIR_MASK)));
880#endif
735} 881}