aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/mm/init.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/mm/init.c')
-rw-r--r--arch/i386/mm/init.c196
1 files changed, 133 insertions, 63 deletions
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index ae436882af7a..dbe16f63a566 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -22,6 +22,7 @@
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/highmem.h> 23#include <linux/highmem.h>
24#include <linux/pagemap.h> 24#include <linux/pagemap.h>
25#include <linux/pfn.h>
25#include <linux/poison.h> 26#include <linux/poison.h>
26#include <linux/bootmem.h> 27#include <linux/bootmem.h>
27#include <linux/slab.h> 28#include <linux/slab.h>
@@ -42,6 +43,7 @@
42#include <asm/tlb.h> 43#include <asm/tlb.h>
43#include <asm/tlbflush.h> 44#include <asm/tlbflush.h>
44#include <asm/sections.h> 45#include <asm/sections.h>
46#include <asm/paravirt.h>
45 47
46unsigned int __VMALLOC_RESERVE = 128 << 20; 48unsigned int __VMALLOC_RESERVE = 128 << 20;
47 49
@@ -61,17 +63,18 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
61 pmd_t *pmd_table; 63 pmd_t *pmd_table;
62 64
63#ifdef CONFIG_X86_PAE 65#ifdef CONFIG_X86_PAE
64 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); 66 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
65 paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); 67 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
66 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 68
67 pud = pud_offset(pgd, 0); 69 paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
68 if (pmd_table != pmd_offset(pud, 0)) 70 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
69 BUG(); 71 pud = pud_offset(pgd, 0);
70#else 72 if (pmd_table != pmd_offset(pud, 0))
73 BUG();
74 }
75#endif
71 pud = pud_offset(pgd, 0); 76 pud = pud_offset(pgd, 0);
72 pmd_table = pmd_offset(pud, 0); 77 pmd_table = pmd_offset(pud, 0);
73#endif
74
75 return pmd_table; 78 return pmd_table;
76} 79}
77 80
@@ -81,14 +84,12 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
81 */ 84 */
82static pte_t * __init one_page_table_init(pmd_t *pmd) 85static pte_t * __init one_page_table_init(pmd_t *pmd)
83{ 86{
84 if (pmd_none(*pmd)) { 87 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
85 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 88 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
89
86 paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); 90 paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
87 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 91 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
88 if (page_table != pte_offset_kernel(pmd, 0)) 92 BUG_ON(page_table != pte_offset_kernel(pmd, 0));
89 BUG();
90
91 return page_table;
92 } 93 }
93 94
94 return pte_offset_kernel(pmd, 0); 95 return pte_offset_kernel(pmd, 0);
@@ -108,7 +109,6 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
108static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) 109static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
109{ 110{
110 pgd_t *pgd; 111 pgd_t *pgd;
111 pud_t *pud;
112 pmd_t *pmd; 112 pmd_t *pmd;
113 int pgd_idx, pmd_idx; 113 int pgd_idx, pmd_idx;
114 unsigned long vaddr; 114 unsigned long vaddr;
@@ -119,13 +119,10 @@ static void __init page_table_range_init (unsigned long start, unsigned long end
119 pgd = pgd_base + pgd_idx; 119 pgd = pgd_base + pgd_idx;
120 120
121 for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { 121 for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
122 if (pgd_none(*pgd)) 122 pmd = one_md_table_init(pgd);
123 one_md_table_init(pgd); 123 pmd = pmd + pmd_index(vaddr);
124 pud = pud_offset(pgd, vaddr);
125 pmd = pmd_offset(pud, vaddr);
126 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { 124 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
127 if (pmd_none(*pmd)) 125 one_page_table_init(pmd);
128 one_page_table_init(pmd);
129 126
130 vaddr += PMD_SIZE; 127 vaddr += PMD_SIZE;
131 } 128 }
@@ -167,20 +164,22 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
167 /* Map with big pages if possible, otherwise create normal page tables. */ 164 /* Map with big pages if possible, otherwise create normal page tables. */
168 if (cpu_has_pse) { 165 if (cpu_has_pse) {
169 unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; 166 unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
170
171 if (is_kernel_text(address) || is_kernel_text(address2)) 167 if (is_kernel_text(address) || is_kernel_text(address2))
172 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); 168 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
173 else 169 else
174 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); 170 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
171
175 pfn += PTRS_PER_PTE; 172 pfn += PTRS_PER_PTE;
176 } else { 173 } else {
177 pte = one_page_table_init(pmd); 174 pte = one_page_table_init(pmd);
178 175
179 for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { 176 for (pte_ofs = 0;
180 if (is_kernel_text(address)) 177 pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
181 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); 178 pte++, pfn++, pte_ofs++, address += PAGE_SIZE) {
182 else 179 if (is_kernel_text(address))
183 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); 180 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
181 else
182 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
184 } 183 }
185 } 184 }
186 } 185 }
@@ -337,24 +336,78 @@ extern void __init remap_numa_kva(void);
337#define remap_numa_kva() do {} while (0) 336#define remap_numa_kva() do {} while (0)
338#endif 337#endif
339 338
340static void __init pagetable_init (void) 339void __init native_pagetable_setup_start(pgd_t *base)
341{ 340{
342 unsigned long vaddr;
343 pgd_t *pgd_base = swapper_pg_dir;
344
345#ifdef CONFIG_X86_PAE 341#ifdef CONFIG_X86_PAE
346 int i; 342 int i;
347 /* Init entries of the first-level page table to the zero page */ 343
348 for (i = 0; i < PTRS_PER_PGD; i++) 344 /*
349 set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); 345 * Init entries of the first-level page table to the
346 * zero page, if they haven't already been set up.
347 *
348 * In a normal native boot, we'll be running on a
349 * pagetable rooted in swapper_pg_dir, but not in PAE
350 * mode, so this will end up clobbering the mappings
351 * for the lower 24Mbytes of the address space,
352 * without affecting the kernel address space.
353 */
354 for (i = 0; i < USER_PTRS_PER_PGD; i++)
355 set_pgd(&base[i],
356 __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
357
358 /* Make sure kernel address space is empty so that a pagetable
359 will be allocated for it. */
360 memset(&base[USER_PTRS_PER_PGD], 0,
361 KERNEL_PGD_PTRS * sizeof(pgd_t));
350#else 362#else
351 paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT); 363 paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT);
352#endif 364#endif
365}
366
367void __init native_pagetable_setup_done(pgd_t *base)
368{
369#ifdef CONFIG_X86_PAE
370 /*
371 * Add low memory identity-mappings - SMP needs it when
372 * starting up on an AP from real-mode. In the non-PAE
373 * case we already have these mappings through head.S.
374 * All user-space mappings are explicitly cleared after
375 * SMP startup.
376 */
377 set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
378#endif
379}
380
381/*
382 * Build a proper pagetable for the kernel mappings. Up until this
383 * point, we've been running on some set of pagetables constructed by
384 * the boot process.
385 *
386 * If we're booting on native hardware, this will be a pagetable
387 * constructed in arch/i386/kernel/head.S, and not running in PAE mode
388 * (even if we'll end up running in PAE). The root of the pagetable
389 * will be swapper_pg_dir.
390 *
391 * If we're booting paravirtualized under a hypervisor, then there are
392 * more options: we may already be running PAE, and the pagetable may
393 * or may not be based in swapper_pg_dir. In any case,
394 * paravirt_pagetable_setup_start() will set up swapper_pg_dir
395 * appropriately for the rest of the initialization to work.
396 *
397 * In general, pagetable_init() assumes that the pagetable may already
398 * be partially populated, and so it avoids stomping on any existing
399 * mappings.
400 */
401static void __init pagetable_init (void)
402{
403 unsigned long vaddr, end;
404 pgd_t *pgd_base = swapper_pg_dir;
405
406 paravirt_pagetable_setup_start(pgd_base);
353 407
354 /* Enable PSE if available */ 408 /* Enable PSE if available */
355 if (cpu_has_pse) { 409 if (cpu_has_pse)
356 set_in_cr4(X86_CR4_PSE); 410 set_in_cr4(X86_CR4_PSE);
357 }
358 411
359 /* Enable PGE if available */ 412 /* Enable PGE if available */
360 if (cpu_has_pge) { 413 if (cpu_has_pge) {
@@ -371,20 +424,12 @@ static void __init pagetable_init (void)
371 * created - mappings will be set by set_fixmap(): 424 * created - mappings will be set by set_fixmap():
372 */ 425 */
373 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; 426 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
374 page_table_range_init(vaddr, 0, pgd_base); 427 end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
428 page_table_range_init(vaddr, end, pgd_base);
375 429
376 permanent_kmaps_init(pgd_base); 430 permanent_kmaps_init(pgd_base);
377 431
378#ifdef CONFIG_X86_PAE 432 paravirt_pagetable_setup_done(pgd_base);
379 /*
380 * Add low memory identity-mappings - SMP needs it when
381 * starting up on an AP from real-mode. In the non-PAE
382 * case we already have these mappings through head.S.
383 * All user-space mappings are explicitly cleared after
384 * SMP startup.
385 */
386 set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]);
387#endif
388} 433}
389 434
390#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) 435#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)
@@ -700,6 +745,8 @@ struct kmem_cache *pmd_cache;
700 745
701void __init pgtable_cache_init(void) 746void __init pgtable_cache_init(void)
702{ 747{
748 size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t);
749
703 if (PTRS_PER_PMD > 1) { 750 if (PTRS_PER_PMD > 1) {
704 pmd_cache = kmem_cache_create("pmd", 751 pmd_cache = kmem_cache_create("pmd",
705 PTRS_PER_PMD*sizeof(pmd_t), 752 PTRS_PER_PMD*sizeof(pmd_t),
@@ -709,13 +756,23 @@ void __init pgtable_cache_init(void)
709 NULL); 756 NULL);
710 if (!pmd_cache) 757 if (!pmd_cache)
711 panic("pgtable_cache_init(): cannot create pmd cache"); 758 panic("pgtable_cache_init(): cannot create pmd cache");
759
760 if (!SHARED_KERNEL_PMD) {
761 /* If we're in PAE mode and have a non-shared
762 kernel pmd, then the pgd size must be a
763 page size. This is because the pgd_list
764 links through the page structure, so there
765 can only be one pgd per page for this to
766 work. */
767 pgd_size = PAGE_SIZE;
768 }
712 } 769 }
713 pgd_cache = kmem_cache_create("pgd", 770 pgd_cache = kmem_cache_create("pgd",
714 PTRS_PER_PGD*sizeof(pgd_t), 771 pgd_size,
715 PTRS_PER_PGD*sizeof(pgd_t), 772 pgd_size,
716 0, 773 0,
717 pgd_ctor, 774 pgd_ctor,
718 PTRS_PER_PMD == 1 ? pgd_dtor : NULL); 775 (!SHARED_KERNEL_PMD) ? pgd_dtor : NULL);
719 if (!pgd_cache) 776 if (!pgd_cache)
720 panic("pgtable_cache_init(): Cannot create pgd cache"); 777 panic("pgtable_cache_init(): Cannot create pgd cache");
721} 778}
@@ -751,13 +808,25 @@ static int noinline do_test_wp_bit(void)
751 808
752void mark_rodata_ro(void) 809void mark_rodata_ro(void)
753{ 810{
754 unsigned long addr = (unsigned long)__start_rodata; 811 unsigned long start = PFN_ALIGN(_text);
812 unsigned long size = PFN_ALIGN(_etext) - start;
755 813
756 for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) 814#ifdef CONFIG_HOTPLUG_CPU
757 change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO); 815 /* It must still be possible to apply SMP alternatives. */
816 if (num_possible_cpus() <= 1)
817#endif
818 {
819 change_page_attr(virt_to_page(start),
820 size >> PAGE_SHIFT, PAGE_KERNEL_RX);
821 printk("Write protecting the kernel text: %luk\n", size >> 10);
822 }
758 823
759 printk("Write protecting the kernel read-only data: %uk\n", 824 start += size;
760 (__end_rodata - __start_rodata) >> 10); 825 size = (unsigned long)__end_rodata - start;
826 change_page_attr(virt_to_page(start),
827 size >> PAGE_SHIFT, PAGE_KERNEL_RO);
828 printk("Write protecting the kernel read-only data: %luk\n",
829 size >> 10);
761 830
762 /* 831 /*
763 * change_page_attr() requires a global_flush_tlb() call after it. 832 * change_page_attr() requires a global_flush_tlb() call after it.
@@ -774,26 +843,27 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
774 unsigned long addr; 843 unsigned long addr;
775 844
776 for (addr = begin; addr < end; addr += PAGE_SIZE) { 845 for (addr = begin; addr < end; addr += PAGE_SIZE) {
777 ClearPageReserved(virt_to_page(addr)); 846 struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
778 init_page_count(virt_to_page(addr)); 847 ClearPageReserved(page);
779 memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); 848 init_page_count(page);
780 free_page(addr); 849 memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE);
850 __free_page(page);
781 totalram_pages++; 851 totalram_pages++;
782 } 852 }
783 printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); 853 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
784} 854}
785 855
786void free_initmem(void) 856void free_initmem(void)
787{ 857{
788 free_init_pages("unused kernel memory", 858 free_init_pages("unused kernel memory",
789 (unsigned long)(&__init_begin), 859 __pa_symbol(&__init_begin),
790 (unsigned long)(&__init_end)); 860 __pa_symbol(&__init_end));
791} 861}
792 862
793#ifdef CONFIG_BLK_DEV_INITRD 863#ifdef CONFIG_BLK_DEV_INITRD
794void free_initrd_mem(unsigned long start, unsigned long end) 864void free_initrd_mem(unsigned long start, unsigned long end)
795{ 865{
796 free_init_pages("initrd memory", start, end); 866 free_init_pages("initrd memory", __pa(start), __pa(end));
797} 867}
798#endif 868#endif
799 869