diff options
Diffstat (limited to 'arch/x86/mm/init_64.c')
-rw-r--r-- | arch/x86/mm/init_64.c | 397 |
1 files changed, 397 insertions, 0 deletions
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3eba7f429880..474e28f10815 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -707,6 +707,343 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
707 | } | 707 | } |
708 | EXPORT_SYMBOL_GPL(arch_add_memory); | 708 | EXPORT_SYMBOL_GPL(arch_add_memory); |
709 | 709 | ||
710 | #define PAGE_INUSE 0xFD | ||
711 | |||
712 | static void __meminit free_pagetable(struct page *page, int order) | ||
713 | { | ||
714 | struct zone *zone; | ||
715 | bool bootmem = false; | ||
716 | unsigned long magic; | ||
717 | unsigned int nr_pages = 1 << order; | ||
718 | |||
719 | /* bootmem page has reserved flag */ | ||
720 | if (PageReserved(page)) { | ||
721 | __ClearPageReserved(page); | ||
722 | bootmem = true; | ||
723 | |||
724 | magic = (unsigned long)page->lru.next; | ||
725 | if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { | ||
726 | while (nr_pages--) | ||
727 | put_page_bootmem(page++); | ||
728 | } else | ||
729 | __free_pages_bootmem(page, order); | ||
730 | } else | ||
731 | free_pages((unsigned long)page_address(page), order); | ||
732 | |||
733 | /* | ||
734 | * SECTION_INFO pages and MIX_SECTION_INFO pages | ||
735 | * are all allocated by bootmem. | ||
736 | */ | ||
737 | if (bootmem) { | ||
738 | zone = page_zone(page); | ||
739 | zone_span_writelock(zone); | ||
740 | zone->present_pages += nr_pages; | ||
741 | zone_span_writeunlock(zone); | ||
742 | totalram_pages += nr_pages; | ||
743 | } | ||
744 | } | ||
745 | |||
746 | static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) | ||
747 | { | ||
748 | pte_t *pte; | ||
749 | int i; | ||
750 | |||
751 | for (i = 0; i < PTRS_PER_PTE; i++) { | ||
752 | pte = pte_start + i; | ||
753 | if (pte_val(*pte)) | ||
754 | return; | ||
755 | } | ||
756 | |||
757 | /* free a pte talbe */ | ||
758 | free_pagetable(pmd_page(*pmd), 0); | ||
759 | spin_lock(&init_mm.page_table_lock); | ||
760 | pmd_clear(pmd); | ||
761 | spin_unlock(&init_mm.page_table_lock); | ||
762 | } | ||
763 | |||
764 | static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) | ||
765 | { | ||
766 | pmd_t *pmd; | ||
767 | int i; | ||
768 | |||
769 | for (i = 0; i < PTRS_PER_PMD; i++) { | ||
770 | pmd = pmd_start + i; | ||
771 | if (pmd_val(*pmd)) | ||
772 | return; | ||
773 | } | ||
774 | |||
775 | /* free a pmd talbe */ | ||
776 | free_pagetable(pud_page(*pud), 0); | ||
777 | spin_lock(&init_mm.page_table_lock); | ||
778 | pud_clear(pud); | ||
779 | spin_unlock(&init_mm.page_table_lock); | ||
780 | } | ||
781 | |||
782 | /* Return true if pgd is changed, otherwise return false. */ | ||
783 | static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd) | ||
784 | { | ||
785 | pud_t *pud; | ||
786 | int i; | ||
787 | |||
788 | for (i = 0; i < PTRS_PER_PUD; i++) { | ||
789 | pud = pud_start + i; | ||
790 | if (pud_val(*pud)) | ||
791 | return false; | ||
792 | } | ||
793 | |||
794 | /* free a pud table */ | ||
795 | free_pagetable(pgd_page(*pgd), 0); | ||
796 | spin_lock(&init_mm.page_table_lock); | ||
797 | pgd_clear(pgd); | ||
798 | spin_unlock(&init_mm.page_table_lock); | ||
799 | |||
800 | return true; | ||
801 | } | ||
802 | |||
803 | static void __meminit | ||
804 | remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, | ||
805 | bool direct) | ||
806 | { | ||
807 | unsigned long next, pages = 0; | ||
808 | pte_t *pte; | ||
809 | void *page_addr; | ||
810 | phys_addr_t phys_addr; | ||
811 | |||
812 | pte = pte_start + pte_index(addr); | ||
813 | for (; addr < end; addr = next, pte++) { | ||
814 | next = (addr + PAGE_SIZE) & PAGE_MASK; | ||
815 | if (next > end) | ||
816 | next = end; | ||
817 | |||
818 | if (!pte_present(*pte)) | ||
819 | continue; | ||
820 | |||
821 | /* | ||
822 | * We mapped [0,1G) memory as identity mapping when | ||
823 | * initializing, in arch/x86/kernel/head_64.S. These | ||
824 | * pagetables cannot be removed. | ||
825 | */ | ||
826 | phys_addr = pte_val(*pte) + (addr & PAGE_MASK); | ||
827 | if (phys_addr < (phys_addr_t)0x40000000) | ||
828 | return; | ||
829 | |||
830 | if (IS_ALIGNED(addr, PAGE_SIZE) && | ||
831 | IS_ALIGNED(next, PAGE_SIZE)) { | ||
832 | /* | ||
833 | * Do not free direct mapping pages since they were | ||
834 | * freed when offlining, or simplely not in use. | ||
835 | */ | ||
836 | if (!direct) | ||
837 | free_pagetable(pte_page(*pte), 0); | ||
838 | |||
839 | spin_lock(&init_mm.page_table_lock); | ||
840 | pte_clear(&init_mm, addr, pte); | ||
841 | spin_unlock(&init_mm.page_table_lock); | ||
842 | |||
843 | /* For non-direct mapping, pages means nothing. */ | ||
844 | pages++; | ||
845 | } else { | ||
846 | /* | ||
847 | * If we are here, we are freeing vmemmap pages since | ||
848 | * direct mapped memory ranges to be freed are aligned. | ||
849 | * | ||
850 | * If we are not removing the whole page, it means | ||
851 | * other page structs in this page are being used and | ||
852 | * we canot remove them. So fill the unused page_structs | ||
853 | * with 0xFD, and remove the page when it is wholly | ||
854 | * filled with 0xFD. | ||
855 | */ | ||
856 | memset((void *)addr, PAGE_INUSE, next - addr); | ||
857 | |||
858 | page_addr = page_address(pte_page(*pte)); | ||
859 | if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { | ||
860 | free_pagetable(pte_page(*pte), 0); | ||
861 | |||
862 | spin_lock(&init_mm.page_table_lock); | ||
863 | pte_clear(&init_mm, addr, pte); | ||
864 | spin_unlock(&init_mm.page_table_lock); | ||
865 | } | ||
866 | } | ||
867 | } | ||
868 | |||
869 | /* Call free_pte_table() in remove_pmd_table(). */ | ||
870 | flush_tlb_all(); | ||
871 | if (direct) | ||
872 | update_page_count(PG_LEVEL_4K, -pages); | ||
873 | } | ||
874 | |||
875 | static void __meminit | ||
876 | remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, | ||
877 | bool direct) | ||
878 | { | ||
879 | unsigned long next, pages = 0; | ||
880 | pte_t *pte_base; | ||
881 | pmd_t *pmd; | ||
882 | void *page_addr; | ||
883 | |||
884 | pmd = pmd_start + pmd_index(addr); | ||
885 | for (; addr < end; addr = next, pmd++) { | ||
886 | next = pmd_addr_end(addr, end); | ||
887 | |||
888 | if (!pmd_present(*pmd)) | ||
889 | continue; | ||
890 | |||
891 | if (pmd_large(*pmd)) { | ||
892 | if (IS_ALIGNED(addr, PMD_SIZE) && | ||
893 | IS_ALIGNED(next, PMD_SIZE)) { | ||
894 | if (!direct) | ||
895 | free_pagetable(pmd_page(*pmd), | ||
896 | get_order(PMD_SIZE)); | ||
897 | |||
898 | spin_lock(&init_mm.page_table_lock); | ||
899 | pmd_clear(pmd); | ||
900 | spin_unlock(&init_mm.page_table_lock); | ||
901 | pages++; | ||
902 | } else { | ||
903 | /* If here, we are freeing vmemmap pages. */ | ||
904 | memset((void *)addr, PAGE_INUSE, next - addr); | ||
905 | |||
906 | page_addr = page_address(pmd_page(*pmd)); | ||
907 | if (!memchr_inv(page_addr, PAGE_INUSE, | ||
908 | PMD_SIZE)) { | ||
909 | free_pagetable(pmd_page(*pmd), | ||
910 | get_order(PMD_SIZE)); | ||
911 | |||
912 | spin_lock(&init_mm.page_table_lock); | ||
913 | pmd_clear(pmd); | ||
914 | spin_unlock(&init_mm.page_table_lock); | ||
915 | } | ||
916 | } | ||
917 | |||
918 | continue; | ||
919 | } | ||
920 | |||
921 | pte_base = (pte_t *)pmd_page_vaddr(*pmd); | ||
922 | remove_pte_table(pte_base, addr, next, direct); | ||
923 | free_pte_table(pte_base, pmd); | ||
924 | } | ||
925 | |||
926 | /* Call free_pmd_table() in remove_pud_table(). */ | ||
927 | if (direct) | ||
928 | update_page_count(PG_LEVEL_2M, -pages); | ||
929 | } | ||
930 | |||
931 | static void __meminit | ||
932 | remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, | ||
933 | bool direct) | ||
934 | { | ||
935 | unsigned long next, pages = 0; | ||
936 | pmd_t *pmd_base; | ||
937 | pud_t *pud; | ||
938 | void *page_addr; | ||
939 | |||
940 | pud = pud_start + pud_index(addr); | ||
941 | for (; addr < end; addr = next, pud++) { | ||
942 | next = pud_addr_end(addr, end); | ||
943 | |||
944 | if (!pud_present(*pud)) | ||
945 | continue; | ||
946 | |||
947 | if (pud_large(*pud)) { | ||
948 | if (IS_ALIGNED(addr, PUD_SIZE) && | ||
949 | IS_ALIGNED(next, PUD_SIZE)) { | ||
950 | if (!direct) | ||
951 | free_pagetable(pud_page(*pud), | ||
952 | get_order(PUD_SIZE)); | ||
953 | |||
954 | spin_lock(&init_mm.page_table_lock); | ||
955 | pud_clear(pud); | ||
956 | spin_unlock(&init_mm.page_table_lock); | ||
957 | pages++; | ||
958 | } else { | ||
959 | /* If here, we are freeing vmemmap pages. */ | ||
960 | memset((void *)addr, PAGE_INUSE, next - addr); | ||
961 | |||
962 | page_addr = page_address(pud_page(*pud)); | ||
963 | if (!memchr_inv(page_addr, PAGE_INUSE, | ||
964 | PUD_SIZE)) { | ||
965 | free_pagetable(pud_page(*pud), | ||
966 | get_order(PUD_SIZE)); | ||
967 | |||
968 | spin_lock(&init_mm.page_table_lock); | ||
969 | pud_clear(pud); | ||
970 | spin_unlock(&init_mm.page_table_lock); | ||
971 | } | ||
972 | } | ||
973 | |||
974 | continue; | ||
975 | } | ||
976 | |||
977 | pmd_base = (pmd_t *)pud_page_vaddr(*pud); | ||
978 | remove_pmd_table(pmd_base, addr, next, direct); | ||
979 | free_pmd_table(pmd_base, pud); | ||
980 | } | ||
981 | |||
982 | if (direct) | ||
983 | update_page_count(PG_LEVEL_1G, -pages); | ||
984 | } | ||
985 | |||
986 | /* start and end are both virtual address. */ | ||
987 | static void __meminit | ||
988 | remove_pagetable(unsigned long start, unsigned long end, bool direct) | ||
989 | { | ||
990 | unsigned long next; | ||
991 | pgd_t *pgd; | ||
992 | pud_t *pud; | ||
993 | bool pgd_changed = false; | ||
994 | |||
995 | for (; start < end; start = next) { | ||
996 | next = pgd_addr_end(start, end); | ||
997 | |||
998 | pgd = pgd_offset_k(start); | ||
999 | if (!pgd_present(*pgd)) | ||
1000 | continue; | ||
1001 | |||
1002 | pud = (pud_t *)pgd_page_vaddr(*pgd); | ||
1003 | remove_pud_table(pud, start, next, direct); | ||
1004 | if (free_pud_table(pud, pgd)) | ||
1005 | pgd_changed = true; | ||
1006 | } | ||
1007 | |||
1008 | if (pgd_changed) | ||
1009 | sync_global_pgds(start, end - 1); | ||
1010 | |||
1011 | flush_tlb_all(); | ||
1012 | } | ||
1013 | |||
1014 | void __ref vmemmap_free(struct page *memmap, unsigned long nr_pages) | ||
1015 | { | ||
1016 | unsigned long start = (unsigned long)memmap; | ||
1017 | unsigned long end = (unsigned long)(memmap + nr_pages); | ||
1018 | |||
1019 | remove_pagetable(start, end, false); | ||
1020 | } | ||
1021 | |||
1022 | static void __meminit | ||
1023 | kernel_physical_mapping_remove(unsigned long start, unsigned long end) | ||
1024 | { | ||
1025 | start = (unsigned long)__va(start); | ||
1026 | end = (unsigned long)__va(end); | ||
1027 | |||
1028 | remove_pagetable(start, end, true); | ||
1029 | } | ||
1030 | |||
1031 | #ifdef CONFIG_MEMORY_HOTREMOVE | ||
1032 | int __ref arch_remove_memory(u64 start, u64 size) | ||
1033 | { | ||
1034 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
1035 | unsigned long nr_pages = size >> PAGE_SHIFT; | ||
1036 | struct zone *zone; | ||
1037 | int ret; | ||
1038 | |||
1039 | zone = page_zone(pfn_to_page(start_pfn)); | ||
1040 | kernel_physical_mapping_remove(start, start + size); | ||
1041 | ret = __remove_pages(zone, start_pfn, nr_pages); | ||
1042 | WARN_ON_ONCE(ret); | ||
1043 | |||
1044 | return ret; | ||
1045 | } | ||
1046 | #endif | ||
710 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 1047 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
711 | 1048 | ||
712 | static struct kcore_list kcore_vsyscall; | 1049 | static struct kcore_list kcore_vsyscall; |
@@ -1019,6 +1356,66 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) | |||
1019 | return 0; | 1356 | return 0; |
1020 | } | 1357 | } |
1021 | 1358 | ||
1359 | #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE) | ||
1360 | void register_page_bootmem_memmap(unsigned long section_nr, | ||
1361 | struct page *start_page, unsigned long size) | ||
1362 | { | ||
1363 | unsigned long addr = (unsigned long)start_page; | ||
1364 | unsigned long end = (unsigned long)(start_page + size); | ||
1365 | unsigned long next; | ||
1366 | pgd_t *pgd; | ||
1367 | pud_t *pud; | ||
1368 | pmd_t *pmd; | ||
1369 | unsigned int nr_pages; | ||
1370 | struct page *page; | ||
1371 | |||
1372 | for (; addr < end; addr = next) { | ||
1373 | pte_t *pte = NULL; | ||
1374 | |||
1375 | pgd = pgd_offset_k(addr); | ||
1376 | if (pgd_none(*pgd)) { | ||
1377 | next = (addr + PAGE_SIZE) & PAGE_MASK; | ||
1378 | continue; | ||
1379 | } | ||
1380 | get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO); | ||
1381 | |||
1382 | pud = pud_offset(pgd, addr); | ||
1383 | if (pud_none(*pud)) { | ||
1384 | next = (addr + PAGE_SIZE) & PAGE_MASK; | ||
1385 | continue; | ||
1386 | } | ||
1387 | get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO); | ||
1388 | |||
1389 | if (!cpu_has_pse) { | ||
1390 | next = (addr + PAGE_SIZE) & PAGE_MASK; | ||
1391 | pmd = pmd_offset(pud, addr); | ||
1392 | if (pmd_none(*pmd)) | ||
1393 | continue; | ||
1394 | get_page_bootmem(section_nr, pmd_page(*pmd), | ||
1395 | MIX_SECTION_INFO); | ||
1396 | |||
1397 | pte = pte_offset_kernel(pmd, addr); | ||
1398 | if (pte_none(*pte)) | ||
1399 | continue; | ||
1400 | get_page_bootmem(section_nr, pte_page(*pte), | ||
1401 | SECTION_INFO); | ||
1402 | } else { | ||
1403 | next = pmd_addr_end(addr, end); | ||
1404 | |||
1405 | pmd = pmd_offset(pud, addr); | ||
1406 | if (pmd_none(*pmd)) | ||
1407 | continue; | ||
1408 | |||
1409 | nr_pages = 1 << (get_order(PMD_SIZE)); | ||
1410 | page = pmd_page(*pmd); | ||
1411 | while (nr_pages--) | ||
1412 | get_page_bootmem(section_nr, page++, | ||
1413 | SECTION_INFO); | ||
1414 | } | ||
1415 | } | ||
1416 | } | ||
1417 | #endif | ||
1418 | |||
1022 | void __meminit vmemmap_populate_print_last(void) | 1419 | void __meminit vmemmap_populate_print_last(void) |
1023 | { | 1420 | { |
1024 | if (p_start) { | 1421 | if (p_start) { |