diff options
Diffstat (limited to 'arch/x86/mm/init_32.c')
| -rw-r--r-- | arch/x86/mm/init_32.c | 461 |
1 files changed, 157 insertions, 304 deletions
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 2cef05074413..db81e9a8556b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
| @@ -49,9 +49,7 @@ | |||
| 49 | #include <asm/paravirt.h> | 49 | #include <asm/paravirt.h> |
| 50 | #include <asm/setup.h> | 50 | #include <asm/setup.h> |
| 51 | #include <asm/cacheflush.h> | 51 | #include <asm/cacheflush.h> |
| 52 | #include <asm/smp.h> | 52 | #include <asm/init.h> |
| 53 | |||
| 54 | unsigned int __VMALLOC_RESERVE = 128 << 20; | ||
| 55 | 53 | ||
| 56 | unsigned long max_low_pfn_mapped; | 54 | unsigned long max_low_pfn_mapped; |
| 57 | unsigned long max_pfn_mapped; | 55 | unsigned long max_pfn_mapped; |
| @@ -61,19 +59,14 @@ unsigned long highstart_pfn, highend_pfn; | |||
| 61 | 59 | ||
| 62 | static noinline int do_test_wp_bit(void); | 60 | static noinline int do_test_wp_bit(void); |
| 63 | 61 | ||
| 64 | 62 | bool __read_mostly __vmalloc_start_set = false; | |
| 65 | static unsigned long __initdata table_start; | ||
| 66 | static unsigned long __meminitdata table_end; | ||
| 67 | static unsigned long __meminitdata table_top; | ||
| 68 | |||
| 69 | static int __initdata after_init_bootmem; | ||
| 70 | 63 | ||
| 71 | static __init void *alloc_low_page(void) | 64 | static __init void *alloc_low_page(void) |
| 72 | { | 65 | { |
| 73 | unsigned long pfn = table_end++; | 66 | unsigned long pfn = e820_table_end++; |
| 74 | void *adr; | 67 | void *adr; |
| 75 | 68 | ||
| 76 | if (pfn >= table_top) | 69 | if (pfn >= e820_table_top) |
| 77 | panic("alloc_low_page: ran out of memory"); | 70 | panic("alloc_low_page: ran out of memory"); |
| 78 | 71 | ||
| 79 | adr = __va(pfn * PAGE_SIZE); | 72 | adr = __va(pfn * PAGE_SIZE); |
| @@ -93,7 +86,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
| 93 | 86 | ||
| 94 | #ifdef CONFIG_X86_PAE | 87 | #ifdef CONFIG_X86_PAE |
| 95 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { | 88 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { |
| 96 | if (after_init_bootmem) | 89 | if (after_bootmem) |
| 97 | pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); | 90 | pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); |
| 98 | else | 91 | else |
| 99 | pmd_table = (pmd_t *)alloc_low_page(); | 92 | pmd_table = (pmd_t *)alloc_low_page(); |
| @@ -120,7 +113,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
| 120 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { | 113 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { |
| 121 | pte_t *page_table = NULL; | 114 | pte_t *page_table = NULL; |
| 122 | 115 | ||
| 123 | if (after_init_bootmem) { | 116 | if (after_bootmem) { |
| 124 | #ifdef CONFIG_DEBUG_PAGEALLOC | 117 | #ifdef CONFIG_DEBUG_PAGEALLOC |
| 125 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); | 118 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); |
| 126 | #endif | 119 | #endif |
| @@ -138,6 +131,23 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
| 138 | return pte_offset_kernel(pmd, 0); | 131 | return pte_offset_kernel(pmd, 0); |
| 139 | } | 132 | } |
| 140 | 133 | ||
| 134 | pmd_t * __init populate_extra_pmd(unsigned long vaddr) | ||
| 135 | { | ||
| 136 | int pgd_idx = pgd_index(vaddr); | ||
| 137 | int pmd_idx = pmd_index(vaddr); | ||
| 138 | |||
| 139 | return one_md_table_init(swapper_pg_dir + pgd_idx) + pmd_idx; | ||
| 140 | } | ||
| 141 | |||
| 142 | pte_t * __init populate_extra_pte(unsigned long vaddr) | ||
| 143 | { | ||
| 144 | int pte_idx = pte_index(vaddr); | ||
| 145 | pmd_t *pmd; | ||
| 146 | |||
| 147 | pmd = populate_extra_pmd(vaddr); | ||
| 148 | return one_page_table_init(pmd) + pte_idx; | ||
| 149 | } | ||
| 150 | |||
| 141 | static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, | 151 | static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, |
| 142 | unsigned long vaddr, pte_t *lastpte) | 152 | unsigned long vaddr, pte_t *lastpte) |
| 143 | { | 153 | { |
| @@ -154,12 +164,12 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, | |||
| 154 | if (pmd_idx_kmap_begin != pmd_idx_kmap_end | 164 | if (pmd_idx_kmap_begin != pmd_idx_kmap_end |
| 155 | && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin | 165 | && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin |
| 156 | && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end | 166 | && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end |
| 157 | && ((__pa(pte) >> PAGE_SHIFT) < table_start | 167 | && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start |
| 158 | || (__pa(pte) >> PAGE_SHIFT) >= table_end)) { | 168 | || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) { |
| 159 | pte_t *newpte; | 169 | pte_t *newpte; |
| 160 | int i; | 170 | int i; |
| 161 | 171 | ||
| 162 | BUG_ON(after_init_bootmem); | 172 | BUG_ON(after_bootmem); |
| 163 | newpte = alloc_low_page(); | 173 | newpte = alloc_low_page(); |
| 164 | for (i = 0; i < PTRS_PER_PTE; i++) | 174 | for (i = 0; i < PTRS_PER_PTE; i++) |
| 165 | set_pte(newpte + i, pte[i]); | 175 | set_pte(newpte + i, pte[i]); |
| @@ -228,11 +238,14 @@ static inline int is_kernel_text(unsigned long addr) | |||
| 228 | * of max_low_pfn pages, by creating page tables starting from address | 238 | * of max_low_pfn pages, by creating page tables starting from address |
| 229 | * PAGE_OFFSET: | 239 | * PAGE_OFFSET: |
| 230 | */ | 240 | */ |
| 231 | static void __init kernel_physical_mapping_init(pgd_t *pgd_base, | 241 | unsigned long __init |
| 232 | unsigned long start_pfn, | 242 | kernel_physical_mapping_init(unsigned long start, |
| 233 | unsigned long end_pfn, | 243 | unsigned long end, |
| 234 | int use_pse) | 244 | unsigned long page_size_mask) |
| 235 | { | 245 | { |
| 246 | int use_pse = page_size_mask == (1<<PG_LEVEL_2M); | ||
| 247 | unsigned long start_pfn, end_pfn; | ||
| 248 | pgd_t *pgd_base = swapper_pg_dir; | ||
| 236 | int pgd_idx, pmd_idx, pte_ofs; | 249 | int pgd_idx, pmd_idx, pte_ofs; |
| 237 | unsigned long pfn; | 250 | unsigned long pfn; |
| 238 | pgd_t *pgd; | 251 | pgd_t *pgd; |
| @@ -241,6 +254,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base, | |||
| 241 | unsigned pages_2m, pages_4k; | 254 | unsigned pages_2m, pages_4k; |
| 242 | int mapping_iter; | 255 | int mapping_iter; |
| 243 | 256 | ||
| 257 | start_pfn = start >> PAGE_SHIFT; | ||
| 258 | end_pfn = end >> PAGE_SHIFT; | ||
| 259 | |||
| 244 | /* | 260 | /* |
| 245 | * First iteration will setup identity mapping using large/small pages | 261 | * First iteration will setup identity mapping using large/small pages |
| 246 | * based on use_pse, with other attributes same as set by | 262 | * based on use_pse, with other attributes same as set by |
| @@ -355,26 +371,6 @@ repeat: | |||
| 355 | mapping_iter = 2; | 371 | mapping_iter = 2; |
| 356 | goto repeat; | 372 | goto repeat; |
| 357 | } | 373 | } |
| 358 | } | ||
| 359 | |||
| 360 | /* | ||
| 361 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | ||
| 362 | * is valid. The argument is a physical page number. | ||
| 363 | * | ||
| 364 | * | ||
| 365 | * On x86, access has to be given to the first megabyte of ram because that area | ||
| 366 | * contains bios code and data regions used by X and dosemu and similar apps. | ||
| 367 | * Access has to be given to non-kernel-ram areas as well, these contain the PCI | ||
| 368 | * mmio resources as well as potential bios/acpi data regions. | ||
| 369 | */ | ||
| 370 | int devmem_is_allowed(unsigned long pagenr) | ||
| 371 | { | ||
| 372 | if (pagenr <= 256) | ||
| 373 | return 1; | ||
| 374 | if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) | ||
| 375 | return 0; | ||
| 376 | if (!page_is_ram(pagenr)) | ||
| 377 | return 1; | ||
| 378 | return 0; | 374 | return 0; |
| 379 | } | 375 | } |
| 380 | 376 | ||
| @@ -470,22 +466,10 @@ void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, | |||
| 470 | work_with_active_regions(nid, add_highpages_work_fn, &data); | 466 | work_with_active_regions(nid, add_highpages_work_fn, &data); |
| 471 | } | 467 | } |
| 472 | 468 | ||
| 473 | #ifndef CONFIG_NUMA | ||
| 474 | static void __init set_highmem_pages_init(void) | ||
| 475 | { | ||
| 476 | add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); | ||
| 477 | |||
| 478 | totalram_pages += totalhigh_pages; | ||
| 479 | } | ||
| 480 | #endif /* !CONFIG_NUMA */ | ||
| 481 | |||
| 482 | #else | 469 | #else |
| 483 | static inline void permanent_kmaps_init(pgd_t *pgd_base) | 470 | static inline void permanent_kmaps_init(pgd_t *pgd_base) |
| 484 | { | 471 | { |
| 485 | } | 472 | } |
| 486 | static inline void set_highmem_pages_init(void) | ||
| 487 | { | ||
| 488 | } | ||
| 489 | #endif /* CONFIG_HIGHMEM */ | 473 | #endif /* CONFIG_HIGHMEM */ |
| 490 | 474 | ||
| 491 | void __init native_pagetable_setup_start(pgd_t *base) | 475 | void __init native_pagetable_setup_start(pgd_t *base) |
| @@ -543,8 +527,9 @@ void __init native_pagetable_setup_done(pgd_t *base) | |||
| 543 | * be partially populated, and so it avoids stomping on any existing | 527 | * be partially populated, and so it avoids stomping on any existing |
| 544 | * mappings. | 528 | * mappings. |
| 545 | */ | 529 | */ |
| 546 | static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base) | 530 | void __init early_ioremap_page_table_range_init(void) |
| 547 | { | 531 | { |
| 532 | pgd_t *pgd_base = swapper_pg_dir; | ||
| 548 | unsigned long vaddr, end; | 533 | unsigned long vaddr, end; |
| 549 | 534 | ||
| 550 | /* | 535 | /* |
| @@ -639,7 +624,7 @@ static int __init noexec_setup(char *str) | |||
| 639 | } | 624 | } |
| 640 | early_param("noexec", noexec_setup); | 625 | early_param("noexec", noexec_setup); |
| 641 | 626 | ||
| 642 | static void __init set_nx(void) | 627 | void __init set_nx(void) |
| 643 | { | 628 | { |
| 644 | unsigned int v[4], l, h; | 629 | unsigned int v[4], l, h; |
| 645 | 630 | ||
| @@ -675,75 +660,97 @@ static int __init parse_highmem(char *arg) | |||
| 675 | } | 660 | } |
| 676 | early_param("highmem", parse_highmem); | 661 | early_param("highmem", parse_highmem); |
| 677 | 662 | ||
| 663 | #define MSG_HIGHMEM_TOO_BIG \ | ||
| 664 | "highmem size (%luMB) is bigger than pages available (%luMB)!\n" | ||
| 665 | |||
| 666 | #define MSG_LOWMEM_TOO_SMALL \ | ||
| 667 | "highmem size (%luMB) results in <64MB lowmem, ignoring it!\n" | ||
| 678 | /* | 668 | /* |
| 679 | * Determine low and high memory ranges: | 669 | * All of RAM fits into lowmem - but if user wants highmem |
| 670 | * artificially via the highmem=x boot parameter then create | ||
| 671 | * it: | ||
| 680 | */ | 672 | */ |
| 681 | void __init find_low_pfn_range(void) | 673 | void __init lowmem_pfn_init(void) |
| 682 | { | 674 | { |
| 683 | /* it could update max_pfn */ | ||
| 684 | |||
| 685 | /* max_low_pfn is 0, we already have early_res support */ | 675 | /* max_low_pfn is 0, we already have early_res support */ |
| 686 | |||
| 687 | max_low_pfn = max_pfn; | 676 | max_low_pfn = max_pfn; |
| 688 | if (max_low_pfn > MAXMEM_PFN) { | 677 | |
| 689 | if (highmem_pages == -1) | 678 | if (highmem_pages == -1) |
| 690 | highmem_pages = max_pfn - MAXMEM_PFN; | 679 | highmem_pages = 0; |
| 691 | if (highmem_pages + MAXMEM_PFN < max_pfn) | 680 | #ifdef CONFIG_HIGHMEM |
| 692 | max_pfn = MAXMEM_PFN + highmem_pages; | 681 | if (highmem_pages >= max_pfn) { |
| 693 | if (highmem_pages + MAXMEM_PFN > max_pfn) { | 682 | printk(KERN_ERR MSG_HIGHMEM_TOO_BIG, |
| 694 | printk(KERN_WARNING "only %luMB highmem pages " | 683 | pages_to_mb(highmem_pages), pages_to_mb(max_pfn)); |
| 695 | "available, ignoring highmem size of %uMB.\n", | 684 | highmem_pages = 0; |
| 696 | pages_to_mb(max_pfn - MAXMEM_PFN), | 685 | } |
| 686 | if (highmem_pages) { | ||
| 687 | if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE) { | ||
| 688 | printk(KERN_ERR MSG_LOWMEM_TOO_SMALL, | ||
| 697 | pages_to_mb(highmem_pages)); | 689 | pages_to_mb(highmem_pages)); |
| 698 | highmem_pages = 0; | 690 | highmem_pages = 0; |
| 699 | } | 691 | } |
| 700 | max_low_pfn = MAXMEM_PFN; | 692 | max_low_pfn -= highmem_pages; |
| 693 | } | ||
| 694 | #else | ||
| 695 | if (highmem_pages) | ||
| 696 | printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n"); | ||
| 697 | #endif | ||
| 698 | } | ||
| 699 | |||
| 700 | #define MSG_HIGHMEM_TOO_SMALL \ | ||
| 701 | "only %luMB highmem pages available, ignoring highmem size of %luMB!\n" | ||
| 702 | |||
| 703 | #define MSG_HIGHMEM_TRIMMED \ | ||
| 704 | "Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n" | ||
| 705 | /* | ||
| 706 | * We have more RAM than fits into lowmem - we try to put it into | ||
| 707 | * highmem, also taking the highmem=x boot parameter into account: | ||
| 708 | */ | ||
| 709 | void __init highmem_pfn_init(void) | ||
| 710 | { | ||
| 711 | max_low_pfn = MAXMEM_PFN; | ||
| 712 | |||
| 713 | if (highmem_pages == -1) | ||
| 714 | highmem_pages = max_pfn - MAXMEM_PFN; | ||
| 715 | |||
| 716 | if (highmem_pages + MAXMEM_PFN < max_pfn) | ||
| 717 | max_pfn = MAXMEM_PFN + highmem_pages; | ||
| 718 | |||
| 719 | if (highmem_pages + MAXMEM_PFN > max_pfn) { | ||
| 720 | printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL, | ||
| 721 | pages_to_mb(max_pfn - MAXMEM_PFN), | ||
| 722 | pages_to_mb(highmem_pages)); | ||
| 723 | highmem_pages = 0; | ||
| 724 | } | ||
| 701 | #ifndef CONFIG_HIGHMEM | 725 | #ifndef CONFIG_HIGHMEM |
| 702 | /* Maximum memory usable is what is directly addressable */ | 726 | /* Maximum memory usable is what is directly addressable */ |
| 703 | printk(KERN_WARNING "Warning only %ldMB will be used.\n", | 727 | printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20); |
| 704 | MAXMEM>>20); | 728 | if (max_pfn > MAX_NONPAE_PFN) |
| 705 | if (max_pfn > MAX_NONPAE_PFN) | 729 | printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n"); |
| 706 | printk(KERN_WARNING | 730 | else |
| 707 | "Use a HIGHMEM64G enabled kernel.\n"); | 731 | printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); |
| 708 | else | 732 | max_pfn = MAXMEM_PFN; |
| 709 | printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); | ||
| 710 | max_pfn = MAXMEM_PFN; | ||
| 711 | #else /* !CONFIG_HIGHMEM */ | 733 | #else /* !CONFIG_HIGHMEM */ |
| 712 | #ifndef CONFIG_HIGHMEM64G | 734 | #ifndef CONFIG_HIGHMEM64G |
| 713 | if (max_pfn > MAX_NONPAE_PFN) { | 735 | if (max_pfn > MAX_NONPAE_PFN) { |
| 714 | max_pfn = MAX_NONPAE_PFN; | 736 | max_pfn = MAX_NONPAE_PFN; |
| 715 | printk(KERN_WARNING "Warning only 4GB will be used." | 737 | printk(KERN_WARNING MSG_HIGHMEM_TRIMMED); |
| 716 | "Use a HIGHMEM64G enabled kernel.\n"); | 738 | } |
| 717 | } | ||
| 718 | #endif /* !CONFIG_HIGHMEM64G */ | 739 | #endif /* !CONFIG_HIGHMEM64G */ |
| 719 | #endif /* !CONFIG_HIGHMEM */ | 740 | #endif /* !CONFIG_HIGHMEM */ |
| 720 | } else { | 741 | } |
| 721 | if (highmem_pages == -1) | 742 | |
| 722 | highmem_pages = 0; | 743 | /* |
| 723 | #ifdef CONFIG_HIGHMEM | 744 | * Determine low and high memory ranges: |
| 724 | if (highmem_pages >= max_pfn) { | 745 | */ |
| 725 | printk(KERN_ERR "highmem size specified (%uMB) is " | 746 | void __init find_low_pfn_range(void) |
| 726 | "bigger than pages available (%luMB)!.\n", | 747 | { |
| 727 | pages_to_mb(highmem_pages), | 748 | /* it could update max_pfn */ |
| 728 | pages_to_mb(max_pfn)); | 749 | |
| 729 | highmem_pages = 0; | 750 | if (max_pfn <= MAXMEM_PFN) |
| 730 | } | 751 | lowmem_pfn_init(); |
| 731 | if (highmem_pages) { | 752 | else |
| 732 | if (max_low_pfn - highmem_pages < | 753 | highmem_pfn_init(); |
| 733 | 64*1024*1024/PAGE_SIZE){ | ||
| 734 | printk(KERN_ERR "highmem size %uMB results in " | ||
| 735 | "smaller than 64MB lowmem, ignoring it.\n" | ||
| 736 | , pages_to_mb(highmem_pages)); | ||
| 737 | highmem_pages = 0; | ||
| 738 | } | ||
| 739 | max_low_pfn -= highmem_pages; | ||
| 740 | } | ||
| 741 | #else | ||
| 742 | if (highmem_pages) | ||
| 743 | printk(KERN_ERR "ignoring highmem size on non-highmem" | ||
| 744 | " kernel!\n"); | ||
| 745 | #endif | ||
| 746 | } | ||
| 747 | } | 754 | } |
| 748 | 755 | ||
| 749 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 756 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
| @@ -769,6 +776,8 @@ void __init initmem_init(unsigned long start_pfn, | |||
| 769 | #ifdef CONFIG_FLATMEM | 776 | #ifdef CONFIG_FLATMEM |
| 770 | max_mapnr = num_physpages; | 777 | max_mapnr = num_physpages; |
| 771 | #endif | 778 | #endif |
| 779 | __vmalloc_start_set = true; | ||
| 780 | |||
| 772 | printk(KERN_NOTICE "%ldMB LOWMEM available.\n", | 781 | printk(KERN_NOTICE "%ldMB LOWMEM available.\n", |
| 773 | pages_to_mb(max_low_pfn)); | 782 | pages_to_mb(max_low_pfn)); |
| 774 | 783 | ||
| @@ -790,176 +799,66 @@ static void __init zone_sizes_init(void) | |||
| 790 | free_area_init_nodes(max_zone_pfns); | 799 | free_area_init_nodes(max_zone_pfns); |
| 791 | } | 800 | } |
| 792 | 801 | ||
| 802 | static unsigned long __init setup_node_bootmem(int nodeid, | ||
| 803 | unsigned long start_pfn, | ||
| 804 | unsigned long end_pfn, | ||
| 805 | unsigned long bootmap) | ||
| 806 | { | ||
| 807 | unsigned long bootmap_size; | ||
| 808 | |||
| 809 | /* don't touch min_low_pfn */ | ||
| 810 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), | ||
| 811 | bootmap >> PAGE_SHIFT, | ||
| 812 | start_pfn, end_pfn); | ||
| 813 | printk(KERN_INFO " node %d low ram: %08lx - %08lx\n", | ||
| 814 | nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
| 815 | printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", | ||
| 816 | nodeid, bootmap, bootmap + bootmap_size); | ||
| 817 | free_bootmem_with_active_regions(nodeid, end_pfn); | ||
| 818 | early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
| 819 | |||
| 820 | return bootmap + bootmap_size; | ||
| 821 | } | ||
| 822 | |||
| 793 | void __init setup_bootmem_allocator(void) | 823 | void __init setup_bootmem_allocator(void) |
| 794 | { | 824 | { |
| 795 | int i; | 825 | int nodeid; |
| 796 | unsigned long bootmap_size, bootmap; | 826 | unsigned long bootmap_size, bootmap; |
| 797 | /* | 827 | /* |
| 798 | * Initialize the boot-time allocator (with low memory only): | 828 | * Initialize the boot-time allocator (with low memory only): |
| 799 | */ | 829 | */ |
| 800 | bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; | 830 | bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; |
| 801 | bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT, | 831 | bootmap = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, bootmap_size, |
| 802 | max_pfn_mapped<<PAGE_SHIFT, bootmap_size, | ||
| 803 | PAGE_SIZE); | 832 | PAGE_SIZE); |
| 804 | if (bootmap == -1L) | 833 | if (bootmap == -1L) |
| 805 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); | 834 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
| 806 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | 835 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); |
| 807 | 836 | ||
| 808 | /* don't touch min_low_pfn */ | ||
| 809 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, | ||
| 810 | min_low_pfn, max_low_pfn); | ||
| 811 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | 837 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", |
| 812 | max_pfn_mapped<<PAGE_SHIFT); | 838 | max_pfn_mapped<<PAGE_SHIFT); |
| 813 | printk(KERN_INFO " low ram: %08lx - %08lx\n", | 839 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); |
| 814 | min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT); | ||
| 815 | printk(KERN_INFO " bootmap %08lx - %08lx\n", | ||
| 816 | bootmap, bootmap + bootmap_size); | ||
| 817 | for_each_online_node(i) | ||
| 818 | free_bootmem_with_active_regions(i, max_low_pfn); | ||
| 819 | early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); | ||
| 820 | |||
| 821 | after_init_bootmem = 1; | ||
| 822 | } | ||
| 823 | |||
| 824 | static void __init find_early_table_space(unsigned long end, int use_pse) | ||
| 825 | { | ||
| 826 | unsigned long puds, pmds, ptes, tables, start; | ||
| 827 | |||
| 828 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; | ||
| 829 | tables = PAGE_ALIGN(puds * sizeof(pud_t)); | ||
| 830 | |||
| 831 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; | ||
| 832 | tables += PAGE_ALIGN(pmds * sizeof(pmd_t)); | ||
| 833 | |||
| 834 | if (use_pse) { | ||
| 835 | unsigned long extra; | ||
| 836 | 840 | ||
| 837 | extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); | 841 | for_each_online_node(nodeid) { |
| 838 | extra += PMD_SIZE; | 842 | unsigned long start_pfn, end_pfn; |
| 839 | ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
| 840 | } else | ||
| 841 | ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
| 842 | 843 | ||
| 843 | tables += PAGE_ALIGN(ptes * sizeof(pte_t)); | 844 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
| 844 | 845 | start_pfn = node_start_pfn[nodeid]; | |
| 845 | /* for fixmap */ | 846 | end_pfn = node_end_pfn[nodeid]; |
| 846 | tables += PAGE_ALIGN(__end_of_fixed_addresses * sizeof(pte_t)); | 847 | if (start_pfn > max_low_pfn) |
| 847 | 848 | continue; | |
| 848 | /* | 849 | if (end_pfn > max_low_pfn) |
| 849 | * RED-PEN putting page tables only on node 0 could | 850 | end_pfn = max_low_pfn; |
| 850 | * cause a hotspot and fill up ZONE_DMA. The page tables | ||
| 851 | * need roughly 0.5KB per GB. | ||
| 852 | */ | ||
| 853 | start = 0x7000; | ||
| 854 | table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | ||
| 855 | tables, PAGE_SIZE); | ||
| 856 | if (table_start == -1UL) | ||
| 857 | panic("Cannot find space for the kernel page tables"); | ||
| 858 | |||
| 859 | table_start >>= PAGE_SHIFT; | ||
| 860 | table_end = table_start; | ||
| 861 | table_top = table_start + (tables>>PAGE_SHIFT); | ||
| 862 | |||
| 863 | printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", | ||
| 864 | end, table_start << PAGE_SHIFT, | ||
| 865 | (table_start << PAGE_SHIFT) + tables); | ||
| 866 | } | ||
| 867 | |||
| 868 | unsigned long __init_refok init_memory_mapping(unsigned long start, | ||
| 869 | unsigned long end) | ||
| 870 | { | ||
| 871 | pgd_t *pgd_base = swapper_pg_dir; | ||
| 872 | unsigned long start_pfn, end_pfn; | ||
| 873 | unsigned long big_page_start; | ||
| 874 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
| 875 | /* | ||
| 876 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | ||
| 877 | * This will simplify cpa(), which otherwise needs to support splitting | ||
| 878 | * large pages into small in interrupt context, etc. | ||
| 879 | */ | ||
| 880 | int use_pse = 0; | ||
| 881 | #else | 851 | #else |
| 882 | int use_pse = cpu_has_pse; | 852 | start_pfn = 0; |
| 883 | #endif | 853 | end_pfn = max_low_pfn; |
| 884 | |||
| 885 | /* | ||
| 886 | * Find space for the kernel direct mapping tables. | ||
| 887 | */ | ||
| 888 | if (!after_init_bootmem) | ||
| 889 | find_early_table_space(end, use_pse); | ||
| 890 | |||
| 891 | #ifdef CONFIG_X86_PAE | ||
| 892 | set_nx(); | ||
| 893 | if (nx_enabled) | ||
| 894 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | ||
| 895 | #endif | 854 | #endif |
| 896 | 855 | bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, | |
| 897 | /* Enable PSE if available */ | 856 | bootmap); |
| 898 | if (cpu_has_pse) | ||
| 899 | set_in_cr4(X86_CR4_PSE); | ||
| 900 | |||
| 901 | /* Enable PGE if available */ | ||
| 902 | if (cpu_has_pge) { | ||
| 903 | set_in_cr4(X86_CR4_PGE); | ||
| 904 | __supported_pte_mask |= _PAGE_GLOBAL; | ||
| 905 | } | ||
| 906 | |||
| 907 | /* | ||
| 908 | * Don't use a large page for the first 2/4MB of memory | ||
| 909 | * because there are often fixed size MTRRs in there | ||
| 910 | * and overlapping MTRRs into large pages can cause | ||
| 911 | * slowdowns. | ||
| 912 | */ | ||
| 913 | big_page_start = PMD_SIZE; | ||
| 914 | |||
| 915 | if (start < big_page_start) { | ||
| 916 | start_pfn = start >> PAGE_SHIFT; | ||
| 917 | end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT); | ||
| 918 | } else { | ||
| 919 | /* head is not big page alignment ? */ | ||
| 920 | start_pfn = start >> PAGE_SHIFT; | ||
| 921 | end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
| 922 | << (PMD_SHIFT - PAGE_SHIFT); | ||
| 923 | } | 857 | } |
| 924 | if (start_pfn < end_pfn) | ||
| 925 | kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0); | ||
| 926 | |||
| 927 | /* big page range */ | ||
| 928 | start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
| 929 | << (PMD_SHIFT - PAGE_SHIFT); | ||
| 930 | if (start_pfn < (big_page_start >> PAGE_SHIFT)) | ||
| 931 | start_pfn = big_page_start >> PAGE_SHIFT; | ||
| 932 | end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | ||
| 933 | if (start_pfn < end_pfn) | ||
| 934 | kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, | ||
| 935 | use_pse); | ||
| 936 | |||
| 937 | /* tail is not big page alignment ? */ | ||
| 938 | start_pfn = end_pfn; | ||
| 939 | if (start_pfn > (big_page_start>>PAGE_SHIFT)) { | ||
| 940 | end_pfn = end >> PAGE_SHIFT; | ||
| 941 | if (start_pfn < end_pfn) | ||
| 942 | kernel_physical_mapping_init(pgd_base, start_pfn, | ||
| 943 | end_pfn, 0); | ||
| 944 | } | ||
| 945 | |||
| 946 | early_ioremap_page_table_range_init(pgd_base); | ||
| 947 | |||
| 948 | load_cr3(swapper_pg_dir); | ||
| 949 | |||
| 950 | __flush_tlb_all(); | ||
| 951 | 858 | ||
| 952 | if (!after_init_bootmem) | 859 | after_bootmem = 1; |
| 953 | reserve_early(table_start << PAGE_SHIFT, | ||
| 954 | table_end << PAGE_SHIFT, "PGTABLE"); | ||
| 955 | |||
| 956 | if (!after_init_bootmem) | ||
| 957 | early_memtest(start, end); | ||
| 958 | |||
| 959 | return end >> PAGE_SHIFT; | ||
| 960 | } | 860 | } |
| 961 | 861 | ||
| 962 | |||
| 963 | /* | 862 | /* |
| 964 | * paging_init() sets up the page tables - note that the first 8MB are | 863 | * paging_init() sets up the page tables - note that the first 8MB are |
| 965 | * already mapped by head.S. | 864 | * already mapped by head.S. |
| @@ -1193,52 +1092,6 @@ void mark_rodata_ro(void) | |||
| 1193 | } | 1092 | } |
| 1194 | #endif | 1093 | #endif |
| 1195 | 1094 | ||
| 1196 | void free_init_pages(char *what, unsigned long begin, unsigned long end) | ||
| 1197 | { | ||
| 1198 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
| 1199 | /* | ||
| 1200 | * If debugging page accesses then do not free this memory but | ||
| 1201 | * mark them not present - any buggy init-section access will | ||
| 1202 | * create a kernel page fault: | ||
| 1203 | */ | ||
| 1204 | printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", | ||
| 1205 | begin, PAGE_ALIGN(end)); | ||
| 1206 | set_memory_np(begin, (end - begin) >> PAGE_SHIFT); | ||
| 1207 | #else | ||
| 1208 | unsigned long addr; | ||
| 1209 | |||
| 1210 | /* | ||
| 1211 | * We just marked the kernel text read only above, now that | ||
| 1212 | * we are going to free part of that, we need to make that | ||
| 1213 | * writeable first. | ||
| 1214 | */ | ||
| 1215 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); | ||
| 1216 | |||
| 1217 | for (addr = begin; addr < end; addr += PAGE_SIZE) { | ||
| 1218 | ClearPageReserved(virt_to_page(addr)); | ||
| 1219 | init_page_count(virt_to_page(addr)); | ||
| 1220 | memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); | ||
| 1221 | free_page(addr); | ||
| 1222 | totalram_pages++; | ||
| 1223 | } | ||
| 1224 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); | ||
| 1225 | #endif | ||
| 1226 | } | ||
| 1227 | |||
| 1228 | void free_initmem(void) | ||
| 1229 | { | ||
| 1230 | free_init_pages("unused kernel memory", | ||
| 1231 | (unsigned long)(&__init_begin), | ||
| 1232 | (unsigned long)(&__init_end)); | ||
| 1233 | } | ||
| 1234 | |||
| 1235 | #ifdef CONFIG_BLK_DEV_INITRD | ||
| 1236 | void free_initrd_mem(unsigned long start, unsigned long end) | ||
| 1237 | { | ||
| 1238 | free_init_pages("initrd memory", start, end); | ||
| 1239 | } | ||
| 1240 | #endif | ||
| 1241 | |||
| 1242 | int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, | 1095 | int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, |
| 1243 | int flags) | 1096 | int flags) |
| 1244 | { | 1097 | { |
