aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/mm')
-rw-r--r--arch/i386/mm/fault.c210
-rw-r--r--arch/i386/mm/hugetlbpage.c12
-rw-r--r--arch/i386/mm/init.c47
-rw-r--r--arch/i386/mm/pageattr.c20
4 files changed, 174 insertions, 115 deletions
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index cf572d9a3b6e..7f0fcf219a26 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -214,6 +214,68 @@ static noinline void force_sig_info_fault(int si_signo, int si_code,
214 214
215fastcall void do_invalid_op(struct pt_regs *, unsigned long); 215fastcall void do_invalid_op(struct pt_regs *, unsigned long);
216 216
217static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
218{
219 unsigned index = pgd_index(address);
220 pgd_t *pgd_k;
221 pud_t *pud, *pud_k;
222 pmd_t *pmd, *pmd_k;
223
224 pgd += index;
225 pgd_k = init_mm.pgd + index;
226
227 if (!pgd_present(*pgd_k))
228 return NULL;
229
230 /*
231 * set_pgd(pgd, *pgd_k); here would be useless on PAE
232 * and redundant with the set_pmd() on non-PAE. As would
233 * set_pud.
234 */
235
236 pud = pud_offset(pgd, address);
237 pud_k = pud_offset(pgd_k, address);
238 if (!pud_present(*pud_k))
239 return NULL;
240
241 pmd = pmd_offset(pud, address);
242 pmd_k = pmd_offset(pud_k, address);
243 if (!pmd_present(*pmd_k))
244 return NULL;
245 if (!pmd_present(*pmd))
246 set_pmd(pmd, *pmd_k);
247 else
248 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
249 return pmd_k;
250}
251
252/*
253 * Handle a fault on the vmalloc or module mapping area
254 *
255 * This assumes no large pages in there.
256 */
257static inline int vmalloc_fault(unsigned long address)
258{
259 unsigned long pgd_paddr;
260 pmd_t *pmd_k;
261 pte_t *pte_k;
262 /*
263 * Synchronize this task's top level page-table
264 * with the 'reference' page table.
265 *
266 * Do _not_ use "current" here. We might be inside
267 * an interrupt in the middle of a task switch..
268 */
269 pgd_paddr = read_cr3();
270 pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
271 if (!pmd_k)
272 return -1;
273 pte_k = pte_offset_kernel(pmd_k, address);
274 if (!pte_present(*pte_k))
275 return -1;
276 return 0;
277}
278
217/* 279/*
218 * This routine handles page faults. It determines the address, 280 * This routine handles page faults. It determines the address,
219 * and the problem, and then passes it off to one of the appropriate 281 * and the problem, and then passes it off to one of the appropriate
@@ -223,6 +285,8 @@ fastcall void do_invalid_op(struct pt_regs *, unsigned long);
223 * bit 0 == 0 means no page found, 1 means protection fault 285 * bit 0 == 0 means no page found, 1 means protection fault
224 * bit 1 == 0 means read, 1 means write 286 * bit 1 == 0 means read, 1 means write
225 * bit 2 == 0 means kernel, 1 means user-mode 287 * bit 2 == 0 means kernel, 1 means user-mode
288 * bit 3 == 1 means use of reserved bit detected
289 * bit 4 == 1 means fault was an instruction fetch
226 */ 290 */
227fastcall void __kprobes do_page_fault(struct pt_regs *regs, 291fastcall void __kprobes do_page_fault(struct pt_regs *regs,
228 unsigned long error_code) 292 unsigned long error_code)
@@ -237,13 +301,6 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
237 /* get the address */ 301 /* get the address */
238 address = read_cr2(); 302 address = read_cr2();
239 303
240 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
241 SIGSEGV) == NOTIFY_STOP)
242 return;
243 /* It's safe to allow irq's after cr2 has been saved */
244 if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
245 local_irq_enable();
246
247 tsk = current; 304 tsk = current;
248 305
249 si_code = SEGV_MAPERR; 306 si_code = SEGV_MAPERR;
@@ -259,17 +316,29 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
259 * 316 *
260 * This verifies that the fault happens in kernel space 317 * This verifies that the fault happens in kernel space
261 * (error_code & 4) == 0, and that the fault was not a 318 * (error_code & 4) == 0, and that the fault was not a
262 * protection error (error_code & 1) == 0. 319 * protection error (error_code & 9) == 0.
263 */ 320 */
264 if (unlikely(address >= TASK_SIZE)) { 321 if (unlikely(address >= TASK_SIZE)) {
265 if (!(error_code & 5)) 322 if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
266 goto vmalloc_fault; 323 return;
267 /* 324 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
325 SIGSEGV) == NOTIFY_STOP)
326 return;
327 /*
268 * Don't take the mm semaphore here. If we fixup a prefetch 328 * Don't take the mm semaphore here. If we fixup a prefetch
269 * fault we could otherwise deadlock. 329 * fault we could otherwise deadlock.
270 */ 330 */
271 goto bad_area_nosemaphore; 331 goto bad_area_nosemaphore;
272 } 332 }
333
334 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
335 SIGSEGV) == NOTIFY_STOP)
336 return;
337
338 /* It's safe to allow irq's after cr2 has been saved and the vmalloc
339 fault has been handled. */
340 if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
341 local_irq_enable();
273 342
274 mm = tsk->mm; 343 mm = tsk->mm;
275 344
@@ -440,24 +509,31 @@ no_context:
440 509
441 bust_spinlocks(1); 510 bust_spinlocks(1);
442 511
443#ifdef CONFIG_X86_PAE 512 if (oops_may_print()) {
444 if (error_code & 16) { 513 #ifdef CONFIG_X86_PAE
445 pte_t *pte = lookup_address(address); 514 if (error_code & 16) {
515 pte_t *pte = lookup_address(address);
446 516
447 if (pte && pte_present(*pte) && !pte_exec_kernel(*pte)) 517 if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
448 printk(KERN_CRIT "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", current->uid); 518 printk(KERN_CRIT "kernel tried to execute "
519 "NX-protected page - exploit attempt? "
520 "(uid: %d)\n", current->uid);
521 }
522 #endif
523 if (address < PAGE_SIZE)
524 printk(KERN_ALERT "BUG: unable to handle kernel NULL "
525 "pointer dereference");
526 else
527 printk(KERN_ALERT "BUG: unable to handle kernel paging"
528 " request");
529 printk(" at virtual address %08lx\n",address);
530 printk(KERN_ALERT " printing eip:\n");
531 printk("%08lx\n", regs->eip);
449 } 532 }
450#endif
451 if (address < PAGE_SIZE)
452 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
453 else
454 printk(KERN_ALERT "Unable to handle kernel paging request");
455 printk(" at virtual address %08lx\n",address);
456 printk(KERN_ALERT " printing eip:\n");
457 printk("%08lx\n", regs->eip);
458 page = read_cr3(); 533 page = read_cr3();
459 page = ((unsigned long *) __va(page))[address >> 22]; 534 page = ((unsigned long *) __va(page))[address >> 22];
460 printk(KERN_ALERT "*pde = %08lx\n", page); 535 if (oops_may_print())
536 printk(KERN_ALERT "*pde = %08lx\n", page);
461 /* 537 /*
462 * We must not directly access the pte in the highpte 538 * We must not directly access the pte in the highpte
463 * case, the page table might be allocated in highmem. 539 * case, the page table might be allocated in highmem.
@@ -465,7 +541,7 @@ no_context:
465 * it's allocated already. 541 * it's allocated already.
466 */ 542 */
467#ifndef CONFIG_HIGHPTE 543#ifndef CONFIG_HIGHPTE
468 if (page & 1) { 544 if ((page & 1) && oops_may_print()) {
469 page &= PAGE_MASK; 545 page &= PAGE_MASK;
470 address &= 0x003ff000; 546 address &= 0x003ff000;
471 page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; 547 page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
@@ -510,51 +586,41 @@ do_sigbus:
510 tsk->thread.error_code = error_code; 586 tsk->thread.error_code = error_code;
511 tsk->thread.trap_no = 14; 587 tsk->thread.trap_no = 14;
512 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); 588 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
513 return; 589}
514
515vmalloc_fault:
516 {
517 /*
518 * Synchronize this task's top level page-table
519 * with the 'reference' page table.
520 *
521 * Do _not_ use "tsk" here. We might be inside
522 * an interrupt in the middle of a task switch..
523 */
524 int index = pgd_index(address);
525 unsigned long pgd_paddr;
526 pgd_t *pgd, *pgd_k;
527 pud_t *pud, *pud_k;
528 pmd_t *pmd, *pmd_k;
529 pte_t *pte_k;
530
531 pgd_paddr = read_cr3();
532 pgd = index + (pgd_t *)__va(pgd_paddr);
533 pgd_k = init_mm.pgd + index;
534
535 if (!pgd_present(*pgd_k))
536 goto no_context;
537
538 /*
539 * set_pgd(pgd, *pgd_k); here would be useless on PAE
540 * and redundant with the set_pmd() on non-PAE. As would
541 * set_pud.
542 */
543 590
544 pud = pud_offset(pgd, address); 591#ifndef CONFIG_X86_PAE
545 pud_k = pud_offset(pgd_k, address); 592void vmalloc_sync_all(void)
546 if (!pud_present(*pud_k)) 593{
547 goto no_context; 594 /*
548 595 * Note that races in the updates of insync and start aren't
549 pmd = pmd_offset(pud, address); 596 * problematic: insync can only get set bits added, and updates to
550 pmd_k = pmd_offset(pud_k, address); 597 * start are only improving performance (without affecting correctness
551 if (!pmd_present(*pmd_k)) 598 * if undone).
552 goto no_context; 599 */
553 set_pmd(pmd, *pmd_k); 600 static DECLARE_BITMAP(insync, PTRS_PER_PGD);
601 static unsigned long start = TASK_SIZE;
602 unsigned long address;
554 603
555 pte_k = pte_offset_kernel(pmd_k, address); 604 BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
556 if (!pte_present(*pte_k)) 605 for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
557 goto no_context; 606 if (!test_bit(pgd_index(address), insync)) {
558 return; 607 unsigned long flags;
608 struct page *page;
609
610 spin_lock_irqsave(&pgd_lock, flags);
611 for (page = pgd_list; page; page =
612 (struct page *)page->index)
613 if (!vmalloc_sync_one(page_address(page),
614 address)) {
615 BUG_ON(page != pgd_list);
616 break;
617 }
618 spin_unlock_irqrestore(&pgd_lock, flags);
619 if (!page)
620 set_bit(pgd_index(address), insync);
621 }
622 if (address == start && test_bit(pgd_index(address), insync))
623 start = address + PGDIR_SIZE;
559 } 624 }
560} 625}
626#endif
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index d524127c9afc..a7d891585411 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -48,18 +48,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
48 return (pte_t *) pmd; 48 return (pte_t *) pmd;
49} 49}
50 50
51/*
52 * This function checks for proper alignment of input addr and len parameters.
53 */
54int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
55{
56 if (len & ~HPAGE_MASK)
57 return -EINVAL;
58 if (addr & ~HPAGE_MASK)
59 return -EINVAL;
60 return 0;
61}
62
63#if 0 /* This is just for testing */ 51#if 0 /* This is just for testing */
64struct page * 52struct page *
65follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 53follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 2700f01994ba..9f66ac582a8b 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -270,7 +270,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
270 270
271static void __meminit free_new_highpage(struct page *page) 271static void __meminit free_new_highpage(struct page *page)
272{ 272{
273 set_page_count(page, 1); 273 init_page_count(page);
274 __free_page(page); 274 __free_page(page);
275 totalhigh_pages++; 275 totalhigh_pages++;
276} 276}
@@ -720,21 +720,6 @@ static int noinline do_test_wp_bit(void)
720 return flag; 720 return flag;
721} 721}
722 722
723void free_initmem(void)
724{
725 unsigned long addr;
726
727 addr = (unsigned long)(&__init_begin);
728 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
729 ClearPageReserved(virt_to_page(addr));
730 set_page_count(virt_to_page(addr), 1);
731 memset((void *)addr, 0xcc, PAGE_SIZE);
732 free_page(addr);
733 totalram_pages++;
734 }
735 printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (__init_end - __init_begin) >> 10);
736}
737
738#ifdef CONFIG_DEBUG_RODATA 723#ifdef CONFIG_DEBUG_RODATA
739 724
740extern char __start_rodata, __end_rodata; 725extern char __start_rodata, __end_rodata;
@@ -758,17 +743,31 @@ void mark_rodata_ro(void)
758} 743}
759#endif 744#endif
760 745
746void free_init_pages(char *what, unsigned long begin, unsigned long end)
747{
748 unsigned long addr;
749
750 for (addr = begin; addr < end; addr += PAGE_SIZE) {
751 ClearPageReserved(virt_to_page(addr));
752 init_page_count(virt_to_page(addr));
753 memset((void *)addr, 0xcc, PAGE_SIZE);
754 free_page(addr);
755 totalram_pages++;
756 }
757 printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
758}
759
760void free_initmem(void)
761{
762 free_init_pages("unused kernel memory",
763 (unsigned long)(&__init_begin),
764 (unsigned long)(&__init_end));
765}
761 766
762#ifdef CONFIG_BLK_DEV_INITRD 767#ifdef CONFIG_BLK_DEV_INITRD
763void free_initrd_mem(unsigned long start, unsigned long end) 768void free_initrd_mem(unsigned long start, unsigned long end)
764{ 769{
765 if (start < end) 770 free_init_pages("initrd memory", start, end);
766 printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
767 for (; start < end; start += PAGE_SIZE) {
768 ClearPageReserved(virt_to_page(start));
769 set_page_count(virt_to_page(start), 1);
770 free_page(start);
771 totalram_pages++;
772 }
773} 771}
774#endif 772#endif
773
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index d0cadb33b54c..92c3d9f0e731 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -51,6 +51,13 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
51 if (!base) 51 if (!base)
52 return NULL; 52 return NULL;
53 53
54 /*
55 * page_private is used to track the number of entries in
56 * the page table page that have non standard attributes.
57 */
58 SetPagePrivate(base);
59 page_private(base) = 0;
60
54 address = __pa(address); 61 address = __pa(address);
55 addr = address & LARGE_PAGE_MASK; 62 addr = address & LARGE_PAGE_MASK;
56 pbase = (pte_t *)page_address(base); 63 pbase = (pte_t *)page_address(base);
@@ -143,11 +150,12 @@ __change_page_attr(struct page *page, pgprot_t prot)
143 return -ENOMEM; 150 return -ENOMEM;
144 set_pmd_pte(kpte,address,mk_pte(split, ref_prot)); 151 set_pmd_pte(kpte,address,mk_pte(split, ref_prot));
145 kpte_page = split; 152 kpte_page = split;
146 } 153 }
147 get_page(kpte_page); 154 page_private(kpte_page)++;
148 } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 155 } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) {
149 set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); 156 set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
150 __put_page(kpte_page); 157 BUG_ON(page_private(kpte_page) == 0);
158 page_private(kpte_page)--;
151 } else 159 } else
152 BUG(); 160 BUG();
153 161
@@ -157,10 +165,8 @@ __change_page_attr(struct page *page, pgprot_t prot)
157 * replace it with a largepage. 165 * replace it with a largepage.
158 */ 166 */
159 if (!PageReserved(kpte_page)) { 167 if (!PageReserved(kpte_page)) {
160 /* memleak and potential failed 2M page regeneration */ 168 if (cpu_has_pse && (page_private(kpte_page) == 0)) {
161 BUG_ON(!page_count(kpte_page)); 169 ClearPagePrivate(kpte_page);
162
163 if (cpu_has_pse && (page_count(kpte_page) == 1)) {
164 list_add(&kpte_page->lru, &df_list); 170 list_add(&kpte_page->lru, &df_list);
165 revert_page(kpte_page, address); 171 revert_page(kpte_page, address);
166 } 172 }