diff options
Diffstat (limited to 'arch/i386/mm')
-rw-r--r-- | arch/i386/mm/fault.c | 60 | ||||
-rw-r--r-- | arch/i386/mm/highmem.c | 10 | ||||
-rw-r--r-- | arch/i386/mm/init.c | 196 | ||||
-rw-r--r-- | arch/i386/mm/pageattr.c | 6 | ||||
-rw-r--r-- | arch/i386/mm/pgtable.c | 94 |
5 files changed, 252 insertions, 114 deletions
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index b8c4e259fc8b..f534c29e80b2 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/tty.h> | 20 | #include <linux/tty.h> |
21 | #include <linux/vt_kern.h> /* For unblank_screen() */ | 21 | #include <linux/vt_kern.h> /* For unblank_screen() */ |
22 | #include <linux/highmem.h> | 22 | #include <linux/highmem.h> |
23 | #include <linux/bootmem.h> /* for max_low_pfn */ | ||
23 | #include <linux/module.h> | 24 | #include <linux/module.h> |
24 | #include <linux/kprobes.h> | 25 | #include <linux/kprobes.h> |
25 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
@@ -301,7 +302,6 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, | |||
301 | struct mm_struct *mm; | 302 | struct mm_struct *mm; |
302 | struct vm_area_struct * vma; | 303 | struct vm_area_struct * vma; |
303 | unsigned long address; | 304 | unsigned long address; |
304 | unsigned long page; | ||
305 | int write, si_code; | 305 | int write, si_code; |
306 | 306 | ||
307 | /* get the address */ | 307 | /* get the address */ |
@@ -510,7 +510,9 @@ no_context: | |||
510 | bust_spinlocks(1); | 510 | bust_spinlocks(1); |
511 | 511 | ||
512 | if (oops_may_print()) { | 512 | if (oops_may_print()) { |
513 | #ifdef CONFIG_X86_PAE | 513 | __typeof__(pte_val(__pte(0))) page; |
514 | |||
515 | #ifdef CONFIG_X86_PAE | ||
514 | if (error_code & 16) { | 516 | if (error_code & 16) { |
515 | pte_t *pte = lookup_address(address); | 517 | pte_t *pte = lookup_address(address); |
516 | 518 | ||
@@ -519,7 +521,7 @@ no_context: | |||
519 | "NX-protected page - exploit attempt? " | 521 | "NX-protected page - exploit attempt? " |
520 | "(uid: %d)\n", current->uid); | 522 | "(uid: %d)\n", current->uid); |
521 | } | 523 | } |
522 | #endif | 524 | #endif |
523 | if (address < PAGE_SIZE) | 525 | if (address < PAGE_SIZE) |
524 | printk(KERN_ALERT "BUG: unable to handle kernel NULL " | 526 | printk(KERN_ALERT "BUG: unable to handle kernel NULL " |
525 | "pointer dereference"); | 527 | "pointer dereference"); |
@@ -529,25 +531,38 @@ no_context: | |||
529 | printk(" at virtual address %08lx\n",address); | 531 | printk(" at virtual address %08lx\n",address); |
530 | printk(KERN_ALERT " printing eip:\n"); | 532 | printk(KERN_ALERT " printing eip:\n"); |
531 | printk("%08lx\n", regs->eip); | 533 | printk("%08lx\n", regs->eip); |
532 | } | 534 | |
533 | page = read_cr3(); | 535 | page = read_cr3(); |
534 | page = ((unsigned long *) __va(page))[address >> 22]; | 536 | page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT]; |
535 | if (oops_may_print()) | 537 | #ifdef CONFIG_X86_PAE |
538 | printk(KERN_ALERT "*pdpt = %016Lx\n", page); | ||
539 | if ((page >> PAGE_SHIFT) < max_low_pfn | ||
540 | && page & _PAGE_PRESENT) { | ||
541 | page &= PAGE_MASK; | ||
542 | page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT) | ||
543 | & (PTRS_PER_PMD - 1)]; | ||
544 | printk(KERN_ALERT "*pde = %016Lx\n", page); | ||
545 | page &= ~_PAGE_NX; | ||
546 | } | ||
547 | #else | ||
536 | printk(KERN_ALERT "*pde = %08lx\n", page); | 548 | printk(KERN_ALERT "*pde = %08lx\n", page); |
537 | /* | ||
538 | * We must not directly access the pte in the highpte | ||
539 | * case, the page table might be allocated in highmem. | ||
540 | * And lets rather not kmap-atomic the pte, just in case | ||
541 | * it's allocated already. | ||
542 | */ | ||
543 | #ifndef CONFIG_HIGHPTE | ||
544 | if ((page & 1) && oops_may_print()) { | ||
545 | page &= PAGE_MASK; | ||
546 | address &= 0x003ff000; | ||
547 | page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; | ||
548 | printk(KERN_ALERT "*pte = %08lx\n", page); | ||
549 | } | ||
550 | #endif | 549 | #endif |
550 | |||
551 | /* | ||
552 | * We must not directly access the pte in the highpte | ||
553 | * case if the page table is located in highmem. | ||
554 | * And let's rather not kmap-atomic the pte, just in case | ||
555 | * it's allocated already. | ||
556 | */ | ||
557 | if ((page >> PAGE_SHIFT) < max_low_pfn | ||
558 | && (page & _PAGE_PRESENT)) { | ||
559 | page &= PAGE_MASK; | ||
560 | page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) | ||
561 | & (PTRS_PER_PTE - 1)]; | ||
562 | printk(KERN_ALERT "*pte = %0*Lx\n", sizeof(page)*2, (u64)page); | ||
563 | } | ||
564 | } | ||
565 | |||
551 | tsk->thread.cr2 = address; | 566 | tsk->thread.cr2 = address; |
552 | tsk->thread.trap_no = 14; | 567 | tsk->thread.trap_no = 14; |
553 | tsk->thread.error_code = error_code; | 568 | tsk->thread.error_code = error_code; |
@@ -588,7 +603,6 @@ do_sigbus: | |||
588 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); | 603 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); |
589 | } | 604 | } |
590 | 605 | ||
591 | #ifndef CONFIG_X86_PAE | ||
592 | void vmalloc_sync_all(void) | 606 | void vmalloc_sync_all(void) |
593 | { | 607 | { |
594 | /* | 608 | /* |
@@ -601,6 +615,9 @@ void vmalloc_sync_all(void) | |||
601 | static unsigned long start = TASK_SIZE; | 615 | static unsigned long start = TASK_SIZE; |
602 | unsigned long address; | 616 | unsigned long address; |
603 | 617 | ||
618 | if (SHARED_KERNEL_PMD) | ||
619 | return; | ||
620 | |||
604 | BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); | 621 | BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); |
605 | for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { | 622 | for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { |
606 | if (!test_bit(pgd_index(address), insync)) { | 623 | if (!test_bit(pgd_index(address), insync)) { |
@@ -623,4 +640,3 @@ void vmalloc_sync_all(void) | |||
623 | start = address + PGDIR_SIZE; | 640 | start = address + PGDIR_SIZE; |
624 | } | 641 | } |
625 | } | 642 | } |
626 | #endif | ||
diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c index ac70d09df7ee..ad8d86cc683e 100644 --- a/arch/i386/mm/highmem.c +++ b/arch/i386/mm/highmem.c | |||
@@ -26,7 +26,7 @@ void kunmap(struct page *page) | |||
26 | * However when holding an atomic kmap is is not legal to sleep, so atomic | 26 | * However when holding an atomic kmap is is not legal to sleep, so atomic |
27 | * kmaps are appropriate for short, tight code paths only. | 27 | * kmaps are appropriate for short, tight code paths only. |
28 | */ | 28 | */ |
29 | void *kmap_atomic(struct page *page, enum km_type type) | 29 | void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) |
30 | { | 30 | { |
31 | enum fixed_addresses idx; | 31 | enum fixed_addresses idx; |
32 | unsigned long vaddr; | 32 | unsigned long vaddr; |
@@ -41,12 +41,17 @@ void *kmap_atomic(struct page *page, enum km_type type) | |||
41 | return page_address(page); | 41 | return page_address(page); |
42 | 42 | ||
43 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | 43 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); |
44 | set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); | 44 | set_pte(kmap_pte-idx, mk_pte(page, prot)); |
45 | arch_flush_lazy_mmu_mode(); | 45 | arch_flush_lazy_mmu_mode(); |
46 | 46 | ||
47 | return (void*) vaddr; | 47 | return (void*) vaddr; |
48 | } | 48 | } |
49 | 49 | ||
50 | void *kmap_atomic(struct page *page, enum km_type type) | ||
51 | { | ||
52 | return kmap_atomic_prot(page, type, kmap_prot); | ||
53 | } | ||
54 | |||
50 | void kunmap_atomic(void *kvaddr, enum km_type type) | 55 | void kunmap_atomic(void *kvaddr, enum km_type type) |
51 | { | 56 | { |
52 | unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; | 57 | unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; |
@@ -67,6 +72,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type) | |||
67 | #endif | 72 | #endif |
68 | } | 73 | } |
69 | 74 | ||
75 | arch_flush_lazy_mmu_mode(); | ||
70 | pagefault_enable(); | 76 | pagefault_enable(); |
71 | } | 77 | } |
72 | 78 | ||
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index ae436882af7a..dbe16f63a566 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
24 | #include <linux/pagemap.h> | 24 | #include <linux/pagemap.h> |
25 | #include <linux/pfn.h> | ||
25 | #include <linux/poison.h> | 26 | #include <linux/poison.h> |
26 | #include <linux/bootmem.h> | 27 | #include <linux/bootmem.h> |
27 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
@@ -42,6 +43,7 @@ | |||
42 | #include <asm/tlb.h> | 43 | #include <asm/tlb.h> |
43 | #include <asm/tlbflush.h> | 44 | #include <asm/tlbflush.h> |
44 | #include <asm/sections.h> | 45 | #include <asm/sections.h> |
46 | #include <asm/paravirt.h> | ||
45 | 47 | ||
46 | unsigned int __VMALLOC_RESERVE = 128 << 20; | 48 | unsigned int __VMALLOC_RESERVE = 128 << 20; |
47 | 49 | ||
@@ -61,17 +63,18 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
61 | pmd_t *pmd_table; | 63 | pmd_t *pmd_table; |
62 | 64 | ||
63 | #ifdef CONFIG_X86_PAE | 65 | #ifdef CONFIG_X86_PAE |
64 | pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); | 66 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { |
65 | paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); | 67 | pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); |
66 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | 68 | |
67 | pud = pud_offset(pgd, 0); | 69 | paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); |
68 | if (pmd_table != pmd_offset(pud, 0)) | 70 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
69 | BUG(); | 71 | pud = pud_offset(pgd, 0); |
70 | #else | 72 | if (pmd_table != pmd_offset(pud, 0)) |
73 | BUG(); | ||
74 | } | ||
75 | #endif | ||
71 | pud = pud_offset(pgd, 0); | 76 | pud = pud_offset(pgd, 0); |
72 | pmd_table = pmd_offset(pud, 0); | 77 | pmd_table = pmd_offset(pud, 0); |
73 | #endif | ||
74 | |||
75 | return pmd_table; | 78 | return pmd_table; |
76 | } | 79 | } |
77 | 80 | ||
@@ -81,14 +84,12 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
81 | */ | 84 | */ |
82 | static pte_t * __init one_page_table_init(pmd_t *pmd) | 85 | static pte_t * __init one_page_table_init(pmd_t *pmd) |
83 | { | 86 | { |
84 | if (pmd_none(*pmd)) { | 87 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { |
85 | pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); | 88 | pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); |
89 | |||
86 | paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); | 90 | paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); |
87 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); | 91 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); |
88 | if (page_table != pte_offset_kernel(pmd, 0)) | 92 | BUG_ON(page_table != pte_offset_kernel(pmd, 0)); |
89 | BUG(); | ||
90 | |||
91 | return page_table; | ||
92 | } | 93 | } |
93 | 94 | ||
94 | return pte_offset_kernel(pmd, 0); | 95 | return pte_offset_kernel(pmd, 0); |
@@ -108,7 +109,6 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
108 | static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) | 109 | static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) |
109 | { | 110 | { |
110 | pgd_t *pgd; | 111 | pgd_t *pgd; |
111 | pud_t *pud; | ||
112 | pmd_t *pmd; | 112 | pmd_t *pmd; |
113 | int pgd_idx, pmd_idx; | 113 | int pgd_idx, pmd_idx; |
114 | unsigned long vaddr; | 114 | unsigned long vaddr; |
@@ -119,13 +119,10 @@ static void __init page_table_range_init (unsigned long start, unsigned long end | |||
119 | pgd = pgd_base + pgd_idx; | 119 | pgd = pgd_base + pgd_idx; |
120 | 120 | ||
121 | for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { | 121 | for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { |
122 | if (pgd_none(*pgd)) | 122 | pmd = one_md_table_init(pgd); |
123 | one_md_table_init(pgd); | 123 | pmd = pmd + pmd_index(vaddr); |
124 | pud = pud_offset(pgd, vaddr); | ||
125 | pmd = pmd_offset(pud, vaddr); | ||
126 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { | 124 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { |
127 | if (pmd_none(*pmd)) | 125 | one_page_table_init(pmd); |
128 | one_page_table_init(pmd); | ||
129 | 126 | ||
130 | vaddr += PMD_SIZE; | 127 | vaddr += PMD_SIZE; |
131 | } | 128 | } |
@@ -167,20 +164,22 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) | |||
167 | /* Map with big pages if possible, otherwise create normal page tables. */ | 164 | /* Map with big pages if possible, otherwise create normal page tables. */ |
168 | if (cpu_has_pse) { | 165 | if (cpu_has_pse) { |
169 | unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; | 166 | unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; |
170 | |||
171 | if (is_kernel_text(address) || is_kernel_text(address2)) | 167 | if (is_kernel_text(address) || is_kernel_text(address2)) |
172 | set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); | 168 | set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); |
173 | else | 169 | else |
174 | set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); | 170 | set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); |
171 | |||
175 | pfn += PTRS_PER_PTE; | 172 | pfn += PTRS_PER_PTE; |
176 | } else { | 173 | } else { |
177 | pte = one_page_table_init(pmd); | 174 | pte = one_page_table_init(pmd); |
178 | 175 | ||
179 | for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { | 176 | for (pte_ofs = 0; |
180 | if (is_kernel_text(address)) | 177 | pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; |
181 | set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); | 178 | pte++, pfn++, pte_ofs++, address += PAGE_SIZE) { |
182 | else | 179 | if (is_kernel_text(address)) |
183 | set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); | 180 | set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); |
181 | else | ||
182 | set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); | ||
184 | } | 183 | } |
185 | } | 184 | } |
186 | } | 185 | } |
@@ -337,24 +336,78 @@ extern void __init remap_numa_kva(void); | |||
337 | #define remap_numa_kva() do {} while (0) | 336 | #define remap_numa_kva() do {} while (0) |
338 | #endif | 337 | #endif |
339 | 338 | ||
340 | static void __init pagetable_init (void) | 339 | void __init native_pagetable_setup_start(pgd_t *base) |
341 | { | 340 | { |
342 | unsigned long vaddr; | ||
343 | pgd_t *pgd_base = swapper_pg_dir; | ||
344 | |||
345 | #ifdef CONFIG_X86_PAE | 341 | #ifdef CONFIG_X86_PAE |
346 | int i; | 342 | int i; |
347 | /* Init entries of the first-level page table to the zero page */ | 343 | |
348 | for (i = 0; i < PTRS_PER_PGD; i++) | 344 | /* |
349 | set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); | 345 | * Init entries of the first-level page table to the |
346 | * zero page, if they haven't already been set up. | ||
347 | * | ||
348 | * In a normal native boot, we'll be running on a | ||
349 | * pagetable rooted in swapper_pg_dir, but not in PAE | ||
350 | * mode, so this will end up clobbering the mappings | ||
351 | * for the lower 24Mbytes of the address space, | ||
352 | * without affecting the kernel address space. | ||
353 | */ | ||
354 | for (i = 0; i < USER_PTRS_PER_PGD; i++) | ||
355 | set_pgd(&base[i], | ||
356 | __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); | ||
357 | |||
358 | /* Make sure kernel address space is empty so that a pagetable | ||
359 | will be allocated for it. */ | ||
360 | memset(&base[USER_PTRS_PER_PGD], 0, | ||
361 | KERNEL_PGD_PTRS * sizeof(pgd_t)); | ||
350 | #else | 362 | #else |
351 | paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT); | 363 | paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT); |
352 | #endif | 364 | #endif |
365 | } | ||
366 | |||
367 | void __init native_pagetable_setup_done(pgd_t *base) | ||
368 | { | ||
369 | #ifdef CONFIG_X86_PAE | ||
370 | /* | ||
371 | * Add low memory identity-mappings - SMP needs it when | ||
372 | * starting up on an AP from real-mode. In the non-PAE | ||
373 | * case we already have these mappings through head.S. | ||
374 | * All user-space mappings are explicitly cleared after | ||
375 | * SMP startup. | ||
376 | */ | ||
377 | set_pgd(&base[0], base[USER_PTRS_PER_PGD]); | ||
378 | #endif | ||
379 | } | ||
380 | |||
381 | /* | ||
382 | * Build a proper pagetable for the kernel mappings. Up until this | ||
383 | * point, we've been running on some set of pagetables constructed by | ||
384 | * the boot process. | ||
385 | * | ||
386 | * If we're booting on native hardware, this will be a pagetable | ||
387 | * constructed in arch/i386/kernel/head.S, and not running in PAE mode | ||
388 | * (even if we'll end up running in PAE). The root of the pagetable | ||
389 | * will be swapper_pg_dir. | ||
390 | * | ||
391 | * If we're booting paravirtualized under a hypervisor, then there are | ||
392 | * more options: we may already be running PAE, and the pagetable may | ||
393 | * or may not be based in swapper_pg_dir. In any case, | ||
394 | * paravirt_pagetable_setup_start() will set up swapper_pg_dir | ||
395 | * appropriately for the rest of the initialization to work. | ||
396 | * | ||
397 | * In general, pagetable_init() assumes that the pagetable may already | ||
398 | * be partially populated, and so it avoids stomping on any existing | ||
399 | * mappings. | ||
400 | */ | ||
401 | static void __init pagetable_init (void) | ||
402 | { | ||
403 | unsigned long vaddr, end; | ||
404 | pgd_t *pgd_base = swapper_pg_dir; | ||
405 | |||
406 | paravirt_pagetable_setup_start(pgd_base); | ||
353 | 407 | ||
354 | /* Enable PSE if available */ | 408 | /* Enable PSE if available */ |
355 | if (cpu_has_pse) { | 409 | if (cpu_has_pse) |
356 | set_in_cr4(X86_CR4_PSE); | 410 | set_in_cr4(X86_CR4_PSE); |
357 | } | ||
358 | 411 | ||
359 | /* Enable PGE if available */ | 412 | /* Enable PGE if available */ |
360 | if (cpu_has_pge) { | 413 | if (cpu_has_pge) { |
@@ -371,20 +424,12 @@ static void __init pagetable_init (void) | |||
371 | * created - mappings will be set by set_fixmap(): | 424 | * created - mappings will be set by set_fixmap(): |
372 | */ | 425 | */ |
373 | vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; | 426 | vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; |
374 | page_table_range_init(vaddr, 0, pgd_base); | 427 | end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; |
428 | page_table_range_init(vaddr, end, pgd_base); | ||
375 | 429 | ||
376 | permanent_kmaps_init(pgd_base); | 430 | permanent_kmaps_init(pgd_base); |
377 | 431 | ||
378 | #ifdef CONFIG_X86_PAE | 432 | paravirt_pagetable_setup_done(pgd_base); |
379 | /* | ||
380 | * Add low memory identity-mappings - SMP needs it when | ||
381 | * starting up on an AP from real-mode. In the non-PAE | ||
382 | * case we already have these mappings through head.S. | ||
383 | * All user-space mappings are explicitly cleared after | ||
384 | * SMP startup. | ||
385 | */ | ||
386 | set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]); | ||
387 | #endif | ||
388 | } | 433 | } |
389 | 434 | ||
390 | #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) | 435 | #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) |
@@ -700,6 +745,8 @@ struct kmem_cache *pmd_cache; | |||
700 | 745 | ||
701 | void __init pgtable_cache_init(void) | 746 | void __init pgtable_cache_init(void) |
702 | { | 747 | { |
748 | size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t); | ||
749 | |||
703 | if (PTRS_PER_PMD > 1) { | 750 | if (PTRS_PER_PMD > 1) { |
704 | pmd_cache = kmem_cache_create("pmd", | 751 | pmd_cache = kmem_cache_create("pmd", |
705 | PTRS_PER_PMD*sizeof(pmd_t), | 752 | PTRS_PER_PMD*sizeof(pmd_t), |
@@ -709,13 +756,23 @@ void __init pgtable_cache_init(void) | |||
709 | NULL); | 756 | NULL); |
710 | if (!pmd_cache) | 757 | if (!pmd_cache) |
711 | panic("pgtable_cache_init(): cannot create pmd cache"); | 758 | panic("pgtable_cache_init(): cannot create pmd cache"); |
759 | |||
760 | if (!SHARED_KERNEL_PMD) { | ||
761 | /* If we're in PAE mode and have a non-shared | ||
762 | kernel pmd, then the pgd size must be a | ||
763 | page size. This is because the pgd_list | ||
764 | links through the page structure, so there | ||
765 | can only be one pgd per page for this to | ||
766 | work. */ | ||
767 | pgd_size = PAGE_SIZE; | ||
768 | } | ||
712 | } | 769 | } |
713 | pgd_cache = kmem_cache_create("pgd", | 770 | pgd_cache = kmem_cache_create("pgd", |
714 | PTRS_PER_PGD*sizeof(pgd_t), | 771 | pgd_size, |
715 | PTRS_PER_PGD*sizeof(pgd_t), | 772 | pgd_size, |
716 | 0, | 773 | 0, |
717 | pgd_ctor, | 774 | pgd_ctor, |
718 | PTRS_PER_PMD == 1 ? pgd_dtor : NULL); | 775 | (!SHARED_KERNEL_PMD) ? pgd_dtor : NULL); |
719 | if (!pgd_cache) | 776 | if (!pgd_cache) |
720 | panic("pgtable_cache_init(): Cannot create pgd cache"); | 777 | panic("pgtable_cache_init(): Cannot create pgd cache"); |
721 | } | 778 | } |
@@ -751,13 +808,25 @@ static int noinline do_test_wp_bit(void) | |||
751 | 808 | ||
752 | void mark_rodata_ro(void) | 809 | void mark_rodata_ro(void) |
753 | { | 810 | { |
754 | unsigned long addr = (unsigned long)__start_rodata; | 811 | unsigned long start = PFN_ALIGN(_text); |
812 | unsigned long size = PFN_ALIGN(_etext) - start; | ||
755 | 813 | ||
756 | for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) | 814 | #ifdef CONFIG_HOTPLUG_CPU |
757 | change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO); | 815 | /* It must still be possible to apply SMP alternatives. */ |
816 | if (num_possible_cpus() <= 1) | ||
817 | #endif | ||
818 | { | ||
819 | change_page_attr(virt_to_page(start), | ||
820 | size >> PAGE_SHIFT, PAGE_KERNEL_RX); | ||
821 | printk("Write protecting the kernel text: %luk\n", size >> 10); | ||
822 | } | ||
758 | 823 | ||
759 | printk("Write protecting the kernel read-only data: %uk\n", | 824 | start += size; |
760 | (__end_rodata - __start_rodata) >> 10); | 825 | size = (unsigned long)__end_rodata - start; |
826 | change_page_attr(virt_to_page(start), | ||
827 | size >> PAGE_SHIFT, PAGE_KERNEL_RO); | ||
828 | printk("Write protecting the kernel read-only data: %luk\n", | ||
829 | size >> 10); | ||
761 | 830 | ||
762 | /* | 831 | /* |
763 | * change_page_attr() requires a global_flush_tlb() call after it. | 832 | * change_page_attr() requires a global_flush_tlb() call after it. |
@@ -774,26 +843,27 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
774 | unsigned long addr; | 843 | unsigned long addr; |
775 | 844 | ||
776 | for (addr = begin; addr < end; addr += PAGE_SIZE) { | 845 | for (addr = begin; addr < end; addr += PAGE_SIZE) { |
777 | ClearPageReserved(virt_to_page(addr)); | 846 | struct page *page = pfn_to_page(addr >> PAGE_SHIFT); |
778 | init_page_count(virt_to_page(addr)); | 847 | ClearPageReserved(page); |
779 | memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); | 848 | init_page_count(page); |
780 | free_page(addr); | 849 | memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE); |
850 | __free_page(page); | ||
781 | totalram_pages++; | 851 | totalram_pages++; |
782 | } | 852 | } |
783 | printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); | 853 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); |
784 | } | 854 | } |
785 | 855 | ||
786 | void free_initmem(void) | 856 | void free_initmem(void) |
787 | { | 857 | { |
788 | free_init_pages("unused kernel memory", | 858 | free_init_pages("unused kernel memory", |
789 | (unsigned long)(&__init_begin), | 859 | __pa_symbol(&__init_begin), |
790 | (unsigned long)(&__init_end)); | 860 | __pa_symbol(&__init_end)); |
791 | } | 861 | } |
792 | 862 | ||
793 | #ifdef CONFIG_BLK_DEV_INITRD | 863 | #ifdef CONFIG_BLK_DEV_INITRD |
794 | void free_initrd_mem(unsigned long start, unsigned long end) | 864 | void free_initrd_mem(unsigned long start, unsigned long end) |
795 | { | 865 | { |
796 | free_init_pages("initrd memory", start, end); | 866 | free_init_pages("initrd memory", __pa(start), __pa(end)); |
797 | } | 867 | } |
798 | #endif | 868 | #endif |
799 | 869 | ||
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index 412ebbd8adb0..47bd477c8ecc 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c | |||
@@ -91,7 +91,7 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) | |||
91 | unsigned long flags; | 91 | unsigned long flags; |
92 | 92 | ||
93 | set_pte_atomic(kpte, pte); /* change init_mm */ | 93 | set_pte_atomic(kpte, pte); /* change init_mm */ |
94 | if (PTRS_PER_PMD > 1) | 94 | if (SHARED_KERNEL_PMD) |
95 | return; | 95 | return; |
96 | 96 | ||
97 | spin_lock_irqsave(&pgd_lock, flags); | 97 | spin_lock_irqsave(&pgd_lock, flags); |
@@ -142,7 +142,7 @@ __change_page_attr(struct page *page, pgprot_t prot) | |||
142 | return -EINVAL; | 142 | return -EINVAL; |
143 | kpte_page = virt_to_page(kpte); | 143 | kpte_page = virt_to_page(kpte); |
144 | if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { | 144 | if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { |
145 | if ((pte_val(*kpte) & _PAGE_PSE) == 0) { | 145 | if (!pte_huge(*kpte)) { |
146 | set_pte_atomic(kpte, mk_pte(page, prot)); | 146 | set_pte_atomic(kpte, mk_pte(page, prot)); |
147 | } else { | 147 | } else { |
148 | pgprot_t ref_prot; | 148 | pgprot_t ref_prot; |
@@ -158,7 +158,7 @@ __change_page_attr(struct page *page, pgprot_t prot) | |||
158 | kpte_page = split; | 158 | kpte_page = split; |
159 | } | 159 | } |
160 | page_private(kpte_page)++; | 160 | page_private(kpte_page)++; |
161 | } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { | 161 | } else if (!pte_huge(*kpte)) { |
162 | set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); | 162 | set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); |
163 | BUG_ON(page_private(kpte_page) == 0); | 163 | BUG_ON(page_private(kpte_page) == 0); |
164 | page_private(kpte_page)--; | 164 | page_private(kpte_page)--; |
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index fa0cfbd551e1..9a96c1647428 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c | |||
@@ -144,10 +144,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) | |||
144 | } | 144 | } |
145 | 145 | ||
146 | static int fixmaps; | 146 | static int fixmaps; |
147 | #ifndef CONFIG_COMPAT_VDSO | ||
148 | unsigned long __FIXADDR_TOP = 0xfffff000; | 147 | unsigned long __FIXADDR_TOP = 0xfffff000; |
149 | EXPORT_SYMBOL(__FIXADDR_TOP); | 148 | EXPORT_SYMBOL(__FIXADDR_TOP); |
150 | #endif | ||
151 | 149 | ||
152 | void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) | 150 | void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) |
153 | { | 151 | { |
@@ -173,12 +171,8 @@ void reserve_top_address(unsigned long reserve) | |||
173 | BUG_ON(fixmaps > 0); | 171 | BUG_ON(fixmaps > 0); |
174 | printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", | 172 | printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", |
175 | (int)-reserve); | 173 | (int)-reserve); |
176 | #ifdef CONFIG_COMPAT_VDSO | ||
177 | BUG_ON(reserve != 0); | ||
178 | #else | ||
179 | __FIXADDR_TOP = -reserve - PAGE_SIZE; | 174 | __FIXADDR_TOP = -reserve - PAGE_SIZE; |
180 | __VMALLOC_RESERVE += reserve; | 175 | __VMALLOC_RESERVE += reserve; |
181 | #endif | ||
182 | } | 176 | } |
183 | 177 | ||
184 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | 178 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) |
@@ -238,42 +232,92 @@ static inline void pgd_list_del(pgd_t *pgd) | |||
238 | set_page_private(next, (unsigned long)pprev); | 232 | set_page_private(next, (unsigned long)pprev); |
239 | } | 233 | } |
240 | 234 | ||
235 | #if (PTRS_PER_PMD == 1) | ||
236 | /* Non-PAE pgd constructor */ | ||
241 | void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) | 237 | void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) |
242 | { | 238 | { |
243 | unsigned long flags; | 239 | unsigned long flags; |
244 | 240 | ||
245 | if (PTRS_PER_PMD == 1) { | 241 | /* !PAE, no pagetable sharing */ |
246 | memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); | 242 | memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); |
247 | spin_lock_irqsave(&pgd_lock, flags); | 243 | |
248 | } | 244 | spin_lock_irqsave(&pgd_lock, flags); |
249 | 245 | ||
246 | /* must happen under lock */ | ||
250 | clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, | 247 | clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, |
251 | swapper_pg_dir + USER_PTRS_PER_PGD, | 248 | swapper_pg_dir + USER_PTRS_PER_PGD, |
252 | KERNEL_PGD_PTRS); | 249 | KERNEL_PGD_PTRS); |
253 | |||
254 | if (PTRS_PER_PMD > 1) | ||
255 | return; | ||
256 | |||
257 | /* must happen under lock */ | ||
258 | paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, | 250 | paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, |
259 | __pa(swapper_pg_dir) >> PAGE_SHIFT, | 251 | __pa(swapper_pg_dir) >> PAGE_SHIFT, |
260 | USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD); | 252 | USER_PTRS_PER_PGD, |
261 | 253 | KERNEL_PGD_PTRS); | |
262 | pgd_list_add(pgd); | 254 | pgd_list_add(pgd); |
263 | spin_unlock_irqrestore(&pgd_lock, flags); | 255 | spin_unlock_irqrestore(&pgd_lock, flags); |
264 | } | 256 | } |
257 | #else /* PTRS_PER_PMD > 1 */ | ||
258 | /* PAE pgd constructor */ | ||
259 | void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) | ||
260 | { | ||
261 | /* PAE, kernel PMD may be shared */ | ||
262 | |||
263 | if (SHARED_KERNEL_PMD) { | ||
264 | clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, | ||
265 | swapper_pg_dir + USER_PTRS_PER_PGD, | ||
266 | KERNEL_PGD_PTRS); | ||
267 | } else { | ||
268 | unsigned long flags; | ||
269 | |||
270 | memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); | ||
271 | spin_lock_irqsave(&pgd_lock, flags); | ||
272 | pgd_list_add(pgd); | ||
273 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
274 | } | ||
275 | } | ||
276 | #endif /* PTRS_PER_PMD */ | ||
265 | 277 | ||
266 | /* never called when PTRS_PER_PMD > 1 */ | ||
267 | void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) | 278 | void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) |
268 | { | 279 | { |
269 | unsigned long flags; /* can be called from interrupt context */ | 280 | unsigned long flags; /* can be called from interrupt context */ |
270 | 281 | ||
282 | BUG_ON(SHARED_KERNEL_PMD); | ||
283 | |||
271 | paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); | 284 | paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); |
272 | spin_lock_irqsave(&pgd_lock, flags); | 285 | spin_lock_irqsave(&pgd_lock, flags); |
273 | pgd_list_del(pgd); | 286 | pgd_list_del(pgd); |
274 | spin_unlock_irqrestore(&pgd_lock, flags); | 287 | spin_unlock_irqrestore(&pgd_lock, flags); |
275 | } | 288 | } |
276 | 289 | ||
290 | #define UNSHARED_PTRS_PER_PGD \ | ||
291 | (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) | ||
292 | |||
293 | /* If we allocate a pmd for part of the kernel address space, then | ||
294 | make sure its initialized with the appropriate kernel mappings. | ||
295 | Otherwise use a cached zeroed pmd. */ | ||
296 | static pmd_t *pmd_cache_alloc(int idx) | ||
297 | { | ||
298 | pmd_t *pmd; | ||
299 | |||
300 | if (idx >= USER_PTRS_PER_PGD) { | ||
301 | pmd = (pmd_t *)__get_free_page(GFP_KERNEL); | ||
302 | |||
303 | if (pmd) | ||
304 | memcpy(pmd, | ||
305 | (void *)pgd_page_vaddr(swapper_pg_dir[idx]), | ||
306 | sizeof(pmd_t) * PTRS_PER_PMD); | ||
307 | } else | ||
308 | pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); | ||
309 | |||
310 | return pmd; | ||
311 | } | ||
312 | |||
313 | static void pmd_cache_free(pmd_t *pmd, int idx) | ||
314 | { | ||
315 | if (idx >= USER_PTRS_PER_PGD) | ||
316 | free_page((unsigned long)pmd); | ||
317 | else | ||
318 | kmem_cache_free(pmd_cache, pmd); | ||
319 | } | ||
320 | |||
277 | pgd_t *pgd_alloc(struct mm_struct *mm) | 321 | pgd_t *pgd_alloc(struct mm_struct *mm) |
278 | { | 322 | { |
279 | int i; | 323 | int i; |
@@ -282,10 +326,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
282 | if (PTRS_PER_PMD == 1 || !pgd) | 326 | if (PTRS_PER_PMD == 1 || !pgd) |
283 | return pgd; | 327 | return pgd; |
284 | 328 | ||
285 | for (i = 0; i < USER_PTRS_PER_PGD; ++i) { | 329 | for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { |
286 | pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); | 330 | pmd_t *pmd = pmd_cache_alloc(i); |
331 | |||
287 | if (!pmd) | 332 | if (!pmd) |
288 | goto out_oom; | 333 | goto out_oom; |
334 | |||
289 | paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); | 335 | paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); |
290 | set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); | 336 | set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); |
291 | } | 337 | } |
@@ -296,7 +342,7 @@ out_oom: | |||
296 | pgd_t pgdent = pgd[i]; | 342 | pgd_t pgdent = pgd[i]; |
297 | void* pmd = (void *)__va(pgd_val(pgdent)-1); | 343 | void* pmd = (void *)__va(pgd_val(pgdent)-1); |
298 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | 344 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); |
299 | kmem_cache_free(pmd_cache, pmd); | 345 | pmd_cache_free(pmd, i); |
300 | } | 346 | } |
301 | kmem_cache_free(pgd_cache, pgd); | 347 | kmem_cache_free(pgd_cache, pgd); |
302 | return NULL; | 348 | return NULL; |
@@ -308,11 +354,11 @@ void pgd_free(pgd_t *pgd) | |||
308 | 354 | ||
309 | /* in the PAE case user pgd entries are overwritten before usage */ | 355 | /* in the PAE case user pgd entries are overwritten before usage */ |
310 | if (PTRS_PER_PMD > 1) | 356 | if (PTRS_PER_PMD > 1) |
311 | for (i = 0; i < USER_PTRS_PER_PGD; ++i) { | 357 | for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { |
312 | pgd_t pgdent = pgd[i]; | 358 | pgd_t pgdent = pgd[i]; |
313 | void* pmd = (void *)__va(pgd_val(pgdent)-1); | 359 | void* pmd = (void *)__va(pgd_val(pgdent)-1); |
314 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | 360 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); |
315 | kmem_cache_free(pmd_cache, pmd); | 361 | pmd_cache_free(pmd, i); |
316 | } | 362 | } |
317 | /* in the non-PAE case, free_pgtables() clears user pgd entries */ | 363 | /* in the non-PAE case, free_pgtables() clears user pgd entries */ |
318 | kmem_cache_free(pgd_cache, pgd); | 364 | kmem_cache_free(pgd_cache, pgd); |