diff options
Diffstat (limited to 'arch/i386/mm/init.c')
-rw-r--r-- | arch/i386/mm/init.c | 196 |
1 files changed, 133 insertions, 63 deletions
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index ae436882af7a..dbe16f63a566 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
24 | #include <linux/pagemap.h> | 24 | #include <linux/pagemap.h> |
25 | #include <linux/pfn.h> | ||
25 | #include <linux/poison.h> | 26 | #include <linux/poison.h> |
26 | #include <linux/bootmem.h> | 27 | #include <linux/bootmem.h> |
27 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
@@ -42,6 +43,7 @@ | |||
42 | #include <asm/tlb.h> | 43 | #include <asm/tlb.h> |
43 | #include <asm/tlbflush.h> | 44 | #include <asm/tlbflush.h> |
44 | #include <asm/sections.h> | 45 | #include <asm/sections.h> |
46 | #include <asm/paravirt.h> | ||
45 | 47 | ||
46 | unsigned int __VMALLOC_RESERVE = 128 << 20; | 48 | unsigned int __VMALLOC_RESERVE = 128 << 20; |
47 | 49 | ||
@@ -61,17 +63,18 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
61 | pmd_t *pmd_table; | 63 | pmd_t *pmd_table; |
62 | 64 | ||
63 | #ifdef CONFIG_X86_PAE | 65 | #ifdef CONFIG_X86_PAE |
64 | pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); | 66 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { |
65 | paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); | 67 | pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); |
66 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | 68 | |
67 | pud = pud_offset(pgd, 0); | 69 | paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); |
68 | if (pmd_table != pmd_offset(pud, 0)) | 70 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
69 | BUG(); | 71 | pud = pud_offset(pgd, 0); |
70 | #else | 72 | if (pmd_table != pmd_offset(pud, 0)) |
73 | BUG(); | ||
74 | } | ||
75 | #endif | ||
71 | pud = pud_offset(pgd, 0); | 76 | pud = pud_offset(pgd, 0); |
72 | pmd_table = pmd_offset(pud, 0); | 77 | pmd_table = pmd_offset(pud, 0); |
73 | #endif | ||
74 | |||
75 | return pmd_table; | 78 | return pmd_table; |
76 | } | 79 | } |
77 | 80 | ||
@@ -81,14 +84,12 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
81 | */ | 84 | */ |
82 | static pte_t * __init one_page_table_init(pmd_t *pmd) | 85 | static pte_t * __init one_page_table_init(pmd_t *pmd) |
83 | { | 86 | { |
84 | if (pmd_none(*pmd)) { | 87 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { |
85 | pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); | 88 | pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); |
89 | |||
86 | paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); | 90 | paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); |
87 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); | 91 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); |
88 | if (page_table != pte_offset_kernel(pmd, 0)) | 92 | BUG_ON(page_table != pte_offset_kernel(pmd, 0)); |
89 | BUG(); | ||
90 | |||
91 | return page_table; | ||
92 | } | 93 | } |
93 | 94 | ||
94 | return pte_offset_kernel(pmd, 0); | 95 | return pte_offset_kernel(pmd, 0); |
@@ -108,7 +109,6 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
108 | static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) | 109 | static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) |
109 | { | 110 | { |
110 | pgd_t *pgd; | 111 | pgd_t *pgd; |
111 | pud_t *pud; | ||
112 | pmd_t *pmd; | 112 | pmd_t *pmd; |
113 | int pgd_idx, pmd_idx; | 113 | int pgd_idx, pmd_idx; |
114 | unsigned long vaddr; | 114 | unsigned long vaddr; |
@@ -119,13 +119,10 @@ static void __init page_table_range_init (unsigned long start, unsigned long end | |||
119 | pgd = pgd_base + pgd_idx; | 119 | pgd = pgd_base + pgd_idx; |
120 | 120 | ||
121 | for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { | 121 | for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { |
122 | if (pgd_none(*pgd)) | 122 | pmd = one_md_table_init(pgd); |
123 | one_md_table_init(pgd); | 123 | pmd = pmd + pmd_index(vaddr); |
124 | pud = pud_offset(pgd, vaddr); | ||
125 | pmd = pmd_offset(pud, vaddr); | ||
126 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { | 124 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { |
127 | if (pmd_none(*pmd)) | 125 | one_page_table_init(pmd); |
128 | one_page_table_init(pmd); | ||
129 | 126 | ||
130 | vaddr += PMD_SIZE; | 127 | vaddr += PMD_SIZE; |
131 | } | 128 | } |
@@ -167,20 +164,22 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) | |||
167 | /* Map with big pages if possible, otherwise create normal page tables. */ | 164 | /* Map with big pages if possible, otherwise create normal page tables. */ |
168 | if (cpu_has_pse) { | 165 | if (cpu_has_pse) { |
169 | unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; | 166 | unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; |
170 | |||
171 | if (is_kernel_text(address) || is_kernel_text(address2)) | 167 | if (is_kernel_text(address) || is_kernel_text(address2)) |
172 | set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); | 168 | set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); |
173 | else | 169 | else |
174 | set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); | 170 | set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); |
171 | |||
175 | pfn += PTRS_PER_PTE; | 172 | pfn += PTRS_PER_PTE; |
176 | } else { | 173 | } else { |
177 | pte = one_page_table_init(pmd); | 174 | pte = one_page_table_init(pmd); |
178 | 175 | ||
179 | for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { | 176 | for (pte_ofs = 0; |
180 | if (is_kernel_text(address)) | 177 | pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; |
181 | set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); | 178 | pte++, pfn++, pte_ofs++, address += PAGE_SIZE) { |
182 | else | 179 | if (is_kernel_text(address)) |
183 | set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); | 180 | set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); |
181 | else | ||
182 | set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); | ||
184 | } | 183 | } |
185 | } | 184 | } |
186 | } | 185 | } |
@@ -337,24 +336,78 @@ extern void __init remap_numa_kva(void); | |||
337 | #define remap_numa_kva() do {} while (0) | 336 | #define remap_numa_kva() do {} while (0) |
338 | #endif | 337 | #endif |
339 | 338 | ||
340 | static void __init pagetable_init (void) | 339 | void __init native_pagetable_setup_start(pgd_t *base) |
341 | { | 340 | { |
342 | unsigned long vaddr; | ||
343 | pgd_t *pgd_base = swapper_pg_dir; | ||
344 | |||
345 | #ifdef CONFIG_X86_PAE | 341 | #ifdef CONFIG_X86_PAE |
346 | int i; | 342 | int i; |
347 | /* Init entries of the first-level page table to the zero page */ | 343 | |
348 | for (i = 0; i < PTRS_PER_PGD; i++) | 344 | /* |
349 | set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); | 345 | * Init entries of the first-level page table to the |
346 | * zero page, if they haven't already been set up. | ||
347 | * | ||
348 | * In a normal native boot, we'll be running on a | ||
349 | * pagetable rooted in swapper_pg_dir, but not in PAE | ||
350 | * mode, so this will end up clobbering the mappings | ||
351 | * for the lower 24Mbytes of the address space, | ||
352 | * without affecting the kernel address space. | ||
353 | */ | ||
354 | for (i = 0; i < USER_PTRS_PER_PGD; i++) | ||
355 | set_pgd(&base[i], | ||
356 | __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); | ||
357 | |||
358 | /* Make sure kernel address space is empty so that a pagetable | ||
359 | will be allocated for it. */ | ||
360 | memset(&base[USER_PTRS_PER_PGD], 0, | ||
361 | KERNEL_PGD_PTRS * sizeof(pgd_t)); | ||
350 | #else | 362 | #else |
351 | paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT); | 363 | paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT); |
352 | #endif | 364 | #endif |
365 | } | ||
366 | |||
367 | void __init native_pagetable_setup_done(pgd_t *base) | ||
368 | { | ||
369 | #ifdef CONFIG_X86_PAE | ||
370 | /* | ||
371 | * Add low memory identity-mappings - SMP needs it when | ||
372 | * starting up on an AP from real-mode. In the non-PAE | ||
373 | * case we already have these mappings through head.S. | ||
374 | * All user-space mappings are explicitly cleared after | ||
375 | * SMP startup. | ||
376 | */ | ||
377 | set_pgd(&base[0], base[USER_PTRS_PER_PGD]); | ||
378 | #endif | ||
379 | } | ||
380 | |||
381 | /* | ||
382 | * Build a proper pagetable for the kernel mappings. Up until this | ||
383 | * point, we've been running on some set of pagetables constructed by | ||
384 | * the boot process. | ||
385 | * | ||
386 | * If we're booting on native hardware, this will be a pagetable | ||
387 | * constructed in arch/i386/kernel/head.S, and not running in PAE mode | ||
388 | * (even if we'll end up running in PAE). The root of the pagetable | ||
389 | * will be swapper_pg_dir. | ||
390 | * | ||
391 | * If we're booting paravirtualized under a hypervisor, then there are | ||
392 | * more options: we may already be running PAE, and the pagetable may | ||
393 | * or may not be based in swapper_pg_dir. In any case, | ||
394 | * paravirt_pagetable_setup_start() will set up swapper_pg_dir | ||
395 | * appropriately for the rest of the initialization to work. | ||
396 | * | ||
397 | * In general, pagetable_init() assumes that the pagetable may already | ||
398 | * be partially populated, and so it avoids stomping on any existing | ||
399 | * mappings. | ||
400 | */ | ||
401 | static void __init pagetable_init (void) | ||
402 | { | ||
403 | unsigned long vaddr, end; | ||
404 | pgd_t *pgd_base = swapper_pg_dir; | ||
405 | |||
406 | paravirt_pagetable_setup_start(pgd_base); | ||
353 | 407 | ||
354 | /* Enable PSE if available */ | 408 | /* Enable PSE if available */ |
355 | if (cpu_has_pse) { | 409 | if (cpu_has_pse) |
356 | set_in_cr4(X86_CR4_PSE); | 410 | set_in_cr4(X86_CR4_PSE); |
357 | } | ||
358 | 411 | ||
359 | /* Enable PGE if available */ | 412 | /* Enable PGE if available */ |
360 | if (cpu_has_pge) { | 413 | if (cpu_has_pge) { |
@@ -371,20 +424,12 @@ static void __init pagetable_init (void) | |||
371 | * created - mappings will be set by set_fixmap(): | 424 | * created - mappings will be set by set_fixmap(): |
372 | */ | 425 | */ |
373 | vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; | 426 | vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; |
374 | page_table_range_init(vaddr, 0, pgd_base); | 427 | end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; |
428 | page_table_range_init(vaddr, end, pgd_base); | ||
375 | 429 | ||
376 | permanent_kmaps_init(pgd_base); | 430 | permanent_kmaps_init(pgd_base); |
377 | 431 | ||
378 | #ifdef CONFIG_X86_PAE | 432 | paravirt_pagetable_setup_done(pgd_base); |
379 | /* | ||
380 | * Add low memory identity-mappings - SMP needs it when | ||
381 | * starting up on an AP from real-mode. In the non-PAE | ||
382 | * case we already have these mappings through head.S. | ||
383 | * All user-space mappings are explicitly cleared after | ||
384 | * SMP startup. | ||
385 | */ | ||
386 | set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]); | ||
387 | #endif | ||
388 | } | 433 | } |
389 | 434 | ||
390 | #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) | 435 | #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) |
@@ -700,6 +745,8 @@ struct kmem_cache *pmd_cache; | |||
700 | 745 | ||
701 | void __init pgtable_cache_init(void) | 746 | void __init pgtable_cache_init(void) |
702 | { | 747 | { |
748 | size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t); | ||
749 | |||
703 | if (PTRS_PER_PMD > 1) { | 750 | if (PTRS_PER_PMD > 1) { |
704 | pmd_cache = kmem_cache_create("pmd", | 751 | pmd_cache = kmem_cache_create("pmd", |
705 | PTRS_PER_PMD*sizeof(pmd_t), | 752 | PTRS_PER_PMD*sizeof(pmd_t), |
@@ -709,13 +756,23 @@ void __init pgtable_cache_init(void) | |||
709 | NULL); | 756 | NULL); |
710 | if (!pmd_cache) | 757 | if (!pmd_cache) |
711 | panic("pgtable_cache_init(): cannot create pmd cache"); | 758 | panic("pgtable_cache_init(): cannot create pmd cache"); |
759 | |||
760 | if (!SHARED_KERNEL_PMD) { | ||
761 | /* If we're in PAE mode and have a non-shared | ||
762 | kernel pmd, then the pgd size must be a | ||
763 | page size. This is because the pgd_list | ||
764 | links through the page structure, so there | ||
765 | can only be one pgd per page for this to | ||
766 | work. */ | ||
767 | pgd_size = PAGE_SIZE; | ||
768 | } | ||
712 | } | 769 | } |
713 | pgd_cache = kmem_cache_create("pgd", | 770 | pgd_cache = kmem_cache_create("pgd", |
714 | PTRS_PER_PGD*sizeof(pgd_t), | 771 | pgd_size, |
715 | PTRS_PER_PGD*sizeof(pgd_t), | 772 | pgd_size, |
716 | 0, | 773 | 0, |
717 | pgd_ctor, | 774 | pgd_ctor, |
718 | PTRS_PER_PMD == 1 ? pgd_dtor : NULL); | 775 | (!SHARED_KERNEL_PMD) ? pgd_dtor : NULL); |
719 | if (!pgd_cache) | 776 | if (!pgd_cache) |
720 | panic("pgtable_cache_init(): Cannot create pgd cache"); | 777 | panic("pgtable_cache_init(): Cannot create pgd cache"); |
721 | } | 778 | } |
@@ -751,13 +808,25 @@ static int noinline do_test_wp_bit(void) | |||
751 | 808 | ||
752 | void mark_rodata_ro(void) | 809 | void mark_rodata_ro(void) |
753 | { | 810 | { |
754 | unsigned long addr = (unsigned long)__start_rodata; | 811 | unsigned long start = PFN_ALIGN(_text); |
812 | unsigned long size = PFN_ALIGN(_etext) - start; | ||
755 | 813 | ||
756 | for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) | 814 | #ifdef CONFIG_HOTPLUG_CPU |
757 | change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO); | 815 | /* It must still be possible to apply SMP alternatives. */ |
816 | if (num_possible_cpus() <= 1) | ||
817 | #endif | ||
818 | { | ||
819 | change_page_attr(virt_to_page(start), | ||
820 | size >> PAGE_SHIFT, PAGE_KERNEL_RX); | ||
821 | printk("Write protecting the kernel text: %luk\n", size >> 10); | ||
822 | } | ||
758 | 823 | ||
759 | printk("Write protecting the kernel read-only data: %uk\n", | 824 | start += size; |
760 | (__end_rodata - __start_rodata) >> 10); | 825 | size = (unsigned long)__end_rodata - start; |
826 | change_page_attr(virt_to_page(start), | ||
827 | size >> PAGE_SHIFT, PAGE_KERNEL_RO); | ||
828 | printk("Write protecting the kernel read-only data: %luk\n", | ||
829 | size >> 10); | ||
761 | 830 | ||
762 | /* | 831 | /* |
763 | * change_page_attr() requires a global_flush_tlb() call after it. | 832 | * change_page_attr() requires a global_flush_tlb() call after it. |
@@ -774,26 +843,27 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
774 | unsigned long addr; | 843 | unsigned long addr; |
775 | 844 | ||
776 | for (addr = begin; addr < end; addr += PAGE_SIZE) { | 845 | for (addr = begin; addr < end; addr += PAGE_SIZE) { |
777 | ClearPageReserved(virt_to_page(addr)); | 846 | struct page *page = pfn_to_page(addr >> PAGE_SHIFT); |
778 | init_page_count(virt_to_page(addr)); | 847 | ClearPageReserved(page); |
779 | memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); | 848 | init_page_count(page); |
780 | free_page(addr); | 849 | memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE); |
850 | __free_page(page); | ||
781 | totalram_pages++; | 851 | totalram_pages++; |
782 | } | 852 | } |
783 | printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); | 853 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); |
784 | } | 854 | } |
785 | 855 | ||
786 | void free_initmem(void) | 856 | void free_initmem(void) |
787 | { | 857 | { |
788 | free_init_pages("unused kernel memory", | 858 | free_init_pages("unused kernel memory", |
789 | (unsigned long)(&__init_begin), | 859 | __pa_symbol(&__init_begin), |
790 | (unsigned long)(&__init_end)); | 860 | __pa_symbol(&__init_end)); |
791 | } | 861 | } |
792 | 862 | ||
793 | #ifdef CONFIG_BLK_DEV_INITRD | 863 | #ifdef CONFIG_BLK_DEV_INITRD |
794 | void free_initrd_mem(unsigned long start, unsigned long end) | 864 | void free_initrd_mem(unsigned long start, unsigned long end) |
795 | { | 865 | { |
796 | free_init_pages("initrd memory", start, end); | 866 | free_init_pages("initrd memory", __pa(start), __pa(end)); |
797 | } | 867 | } |
798 | #endif | 868 | #endif |
799 | 869 | ||