diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-23 20:50:35 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-23 20:50:35 -0500 |
commit | 5ce1a70e2f00f0bce0cab57f798ca354b9496169 (patch) | |
tree | 6e80200536b7a3576fd71ff2c7135ffe87dc858e /arch | |
parent | 9d3cae26acb471d5954cfdc25d1438b32060babe (diff) | |
parent | ef53d16cded7f89b3843b7a96970dab897843ea5 (diff) |
Merge branch 'akpm' (more incoming from Andrew)
Merge second patch-bomb from Andrew Morton:
- A little DM fix
- the MM queue
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (154 commits)
ksm: allocate roots when needed
mm: cleanup "swapcache" in do_swap_page
mm,ksm: swapoff might need to copy
mm,ksm: FOLL_MIGRATION do migration_entry_wait
ksm: shrink 32-bit rmap_item back to 32 bytes
ksm: treat unstable nid like in stable tree
ksm: add some comments
tmpfs: fix mempolicy object leaks
tmpfs: fix use-after-free of mempolicy object
mm/fadvise.c: drain all pagevecs if POSIX_FADV_DONTNEED fails to discard all pages
mm: export mmu notifier invalidates
mm: accelerate mm_populate() treatment of THP pages
mm: use long type for page counts in mm_populate() and get_user_pages()
mm: accurately document nr_free_*_pages functions with code comments
HWPOISON: change order of error_states[]'s elements
HWPOISON: fix misjudgement of page_action() for errors on mlocked pages
memcg: stop warning on memcg_propagate_kmem
net: change type of virtio_chan->p9_max_pages
vmscan: change type of vm_total_pages to unsigned long
fs/nfsd: change type of max_delegations, nfsd_drc_max_mem and nfsd_drc_mem_used
...
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm64/mm/mmu.c | 3 | ||||
-rw-r--r-- | arch/ia64/mm/contig.c | 2 | ||||
-rw-r--r-- | arch/ia64/mm/discontig.c | 6 | ||||
-rw-r--r-- | arch/ia64/mm/init.c | 18 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 12 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 12 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 4 | ||||
-rw-r--r-- | arch/sh/mm/init.c | 17 | ||||
-rw-r--r-- | arch/sparc/mm/init_32.c | 2 | ||||
-rw-r--r-- | arch/sparc/mm/init_64.c | 5 | ||||
-rw-r--r-- | arch/tile/mm/elf.c | 1 | ||||
-rw-r--r-- | arch/tile/mm/init.c | 8 | ||||
-rw-r--r-- | arch/tile/mm/pgtable.c | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/numa.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_types.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/acpi/boot.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 13 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 12 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 397 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 17 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 47 | ||||
-rw-r--r-- | arch/x86/mm/srat.c | 125 |
23 files changed, 673 insertions, 44 deletions
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index f4dd585898c5..224b44ab534e 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c | |||
@@ -434,4 +434,7 @@ int __meminit vmemmap_populate(struct page *start_page, | |||
434 | return 0; | 434 | return 0; |
435 | } | 435 | } |
436 | #endif /* CONFIG_ARM64_64K_PAGES */ | 436 | #endif /* CONFIG_ARM64_64K_PAGES */ |
437 | void vmemmap_free(struct page *memmap, unsigned long nr_pages) | ||
438 | { | ||
439 | } | ||
437 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ | 440 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ |
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 1516d1dc11fd..80dab509dfb0 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c | |||
@@ -93,7 +93,7 @@ void show_mem(unsigned int filter) | |||
93 | printk(KERN_INFO "%d pages swap cached\n", total_cached); | 93 | printk(KERN_INFO "%d pages swap cached\n", total_cached); |
94 | printk(KERN_INFO "Total of %ld pages in page table cache\n", | 94 | printk(KERN_INFO "Total of %ld pages in page table cache\n", |
95 | quicklist_total_size()); | 95 | quicklist_total_size()); |
96 | printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages()); | 96 | printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages()); |
97 | } | 97 | } |
98 | 98 | ||
99 | 99 | ||
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index c641333cd997..c2e955ee79a8 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c | |||
@@ -666,7 +666,7 @@ void show_mem(unsigned int filter) | |||
666 | printk(KERN_INFO "%d pages swap cached\n", total_cached); | 666 | printk(KERN_INFO "%d pages swap cached\n", total_cached); |
667 | printk(KERN_INFO "Total of %ld pages in page table cache\n", | 667 | printk(KERN_INFO "Total of %ld pages in page table cache\n", |
668 | quicklist_total_size()); | 668 | quicklist_total_size()); |
669 | printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages()); | 669 | printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages()); |
670 | } | 670 | } |
671 | 671 | ||
672 | /** | 672 | /** |
@@ -822,4 +822,8 @@ int __meminit vmemmap_populate(struct page *start_page, | |||
822 | { | 822 | { |
823 | return vmemmap_populate_basepages(start_page, size, node); | 823 | return vmemmap_populate_basepages(start_page, size, node); |
824 | } | 824 | } |
825 | |||
826 | void vmemmap_free(struct page *memmap, unsigned long nr_pages) | ||
827 | { | ||
828 | } | ||
825 | #endif | 829 | #endif |
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index b755ea92aea7..20bc967c7209 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c | |||
@@ -688,6 +688,24 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
688 | 688 | ||
689 | return ret; | 689 | return ret; |
690 | } | 690 | } |
691 | |||
692 | #ifdef CONFIG_MEMORY_HOTREMOVE | ||
693 | int arch_remove_memory(u64 start, u64 size) | ||
694 | { | ||
695 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
696 | unsigned long nr_pages = size >> PAGE_SHIFT; | ||
697 | struct zone *zone; | ||
698 | int ret; | ||
699 | |||
700 | zone = page_zone(pfn_to_page(start_pfn)); | ||
701 | ret = __remove_pages(zone, start_pfn, nr_pages); | ||
702 | if (ret) | ||
703 | pr_warn("%s: Problem encountered in __remove_pages() as" | ||
704 | " ret=%d\n", __func__, ret); | ||
705 | |||
706 | return ret; | ||
707 | } | ||
708 | #endif | ||
691 | #endif | 709 | #endif |
692 | 710 | ||
693 | /* | 711 | /* |
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 95a45293e5ac..7e2246fb2f31 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c | |||
@@ -297,5 +297,10 @@ int __meminit vmemmap_populate(struct page *start_page, | |||
297 | 297 | ||
298 | return 0; | 298 | return 0; |
299 | } | 299 | } |
300 | |||
301 | void vmemmap_free(struct page *memmap, unsigned long nr_pages) | ||
302 | { | ||
303 | } | ||
304 | |||
300 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ | 305 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ |
301 | 306 | ||
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 40df7c8f2096..f1f7409a4183 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c | |||
@@ -133,6 +133,18 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
133 | 133 | ||
134 | return __add_pages(nid, zone, start_pfn, nr_pages); | 134 | return __add_pages(nid, zone, start_pfn, nr_pages); |
135 | } | 135 | } |
136 | |||
137 | #ifdef CONFIG_MEMORY_HOTREMOVE | ||
138 | int arch_remove_memory(u64 start, u64 size) | ||
139 | { | ||
140 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
141 | unsigned long nr_pages = size >> PAGE_SHIFT; | ||
142 | struct zone *zone; | ||
143 | |||
144 | zone = page_zone(pfn_to_page(start_pfn)); | ||
145 | return __remove_pages(zone, start_pfn, nr_pages); | ||
146 | } | ||
147 | #endif | ||
136 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 148 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
137 | 149 | ||
138 | /* | 150 | /* |
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ae672f41c464..49ce6bb2c641 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c | |||
@@ -228,4 +228,16 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
228 | vmem_remove_mapping(start, size); | 228 | vmem_remove_mapping(start, size); |
229 | return rc; | 229 | return rc; |
230 | } | 230 | } |
231 | |||
232 | #ifdef CONFIG_MEMORY_HOTREMOVE | ||
233 | int arch_remove_memory(u64 start, u64 size) | ||
234 | { | ||
235 | /* | ||
236 | * There is no hardware or firmware interface which could trigger a | ||
237 | * hot memory remove on s390. So there is nothing that needs to be | ||
238 | * implemented. | ||
239 | */ | ||
240 | return -EBUSY; | ||
241 | } | ||
242 | #endif | ||
231 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 243 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 79699f46a443..e21aaf4f5cb6 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c | |||
@@ -268,6 +268,10 @@ out: | |||
268 | return ret; | 268 | return ret; |
269 | } | 269 | } |
270 | 270 | ||
271 | void vmemmap_free(struct page *memmap, unsigned long nr_pages) | ||
272 | { | ||
273 | } | ||
274 | |||
271 | /* | 275 | /* |
272 | * Add memory segment to the segment list if it doesn't overlap with | 276 | * Add memory segment to the segment list if it doesn't overlap with |
273 | * an already present segment. | 277 | * an already present segment. |
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 82cc576fab15..105794037143 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c | |||
@@ -558,4 +558,21 @@ int memory_add_physaddr_to_nid(u64 addr) | |||
558 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | 558 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); |
559 | #endif | 559 | #endif |
560 | 560 | ||
561 | #ifdef CONFIG_MEMORY_HOTREMOVE | ||
562 | int arch_remove_memory(u64 start, u64 size) | ||
563 | { | ||
564 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
565 | unsigned long nr_pages = size >> PAGE_SHIFT; | ||
566 | struct zone *zone; | ||
567 | int ret; | ||
568 | |||
569 | zone = page_zone(pfn_to_page(start_pfn)); | ||
570 | ret = __remove_pages(zone, start_pfn, nr_pages); | ||
571 | if (unlikely(ret)) | ||
572 | pr_warn("%s: Failed, __remove_pages() == %d\n", __func__, | ||
573 | ret); | ||
574 | |||
575 | return ret; | ||
576 | } | ||
577 | #endif | ||
561 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 578 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index dde85ef1c56d..48e0c030e8f5 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c | |||
@@ -57,7 +57,7 @@ void show_mem(unsigned int filter) | |||
57 | printk("Mem-info:\n"); | 57 | printk("Mem-info:\n"); |
58 | show_free_areas(filter); | 58 | show_free_areas(filter); |
59 | printk("Free swap: %6ldkB\n", | 59 | printk("Free swap: %6ldkB\n", |
60 | nr_swap_pages << (PAGE_SHIFT-10)); | 60 | get_nr_swap_pages() << (PAGE_SHIFT-10)); |
61 | printk("%ld pages of RAM\n", totalram_pages); | 61 | printk("%ld pages of RAM\n", totalram_pages); |
62 | printk("%ld free pages\n", nr_free_pages()); | 62 | printk("%ld free pages\n", nr_free_pages()); |
63 | } | 63 | } |
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 5c2c6e61facb..1588d33d5492 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c | |||
@@ -2235,6 +2235,11 @@ void __meminit vmemmap_populate_print_last(void) | |||
2235 | node_start = 0; | 2235 | node_start = 0; |
2236 | } | 2236 | } |
2237 | } | 2237 | } |
2238 | |||
2239 | void vmemmap_free(struct page *memmap, unsigned long nr_pages) | ||
2240 | { | ||
2241 | } | ||
2242 | |||
2238 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ | 2243 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ |
2239 | 2244 | ||
2240 | static void prot_init_common(unsigned long page_none, | 2245 | static void prot_init_common(unsigned long page_none, |
diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c index 3cfa98bf9125..743c951c61b0 100644 --- a/arch/tile/mm/elf.c +++ b/arch/tile/mm/elf.c | |||
@@ -130,7 +130,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, | |||
130 | if (!retval) { | 130 | if (!retval) { |
131 | unsigned long addr = MEM_USER_INTRPT; | 131 | unsigned long addr = MEM_USER_INTRPT; |
132 | addr = mmap_region(NULL, addr, INTRPT_SIZE, | 132 | addr = mmap_region(NULL, addr, INTRPT_SIZE, |
133 | MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE, | ||
134 | VM_READ|VM_EXEC| | 133 | VM_READ|VM_EXEC| |
135 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0); | 134 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0); |
136 | if (addr > (unsigned long) -PAGE_SIZE) | 135 | if (addr > (unsigned long) -PAGE_SIZE) |
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index ef29d6c5e10e..2749515a0547 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c | |||
@@ -935,6 +935,14 @@ int remove_memory(u64 start, u64 size) | |||
935 | { | 935 | { |
936 | return -EINVAL; | 936 | return -EINVAL; |
937 | } | 937 | } |
938 | |||
939 | #ifdef CONFIG_MEMORY_HOTREMOVE | ||
940 | int arch_remove_memory(u64 start, u64 size) | ||
941 | { | ||
942 | /* TODO */ | ||
943 | return -EBUSY; | ||
944 | } | ||
945 | #endif | ||
938 | #endif | 946 | #endif |
939 | 947 | ||
940 | struct kmem_cache *pgd_cache; | 948 | struct kmem_cache *pgd_cache; |
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index de0de0c0e8a1..b3b4972c2451 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c | |||
@@ -61,7 +61,7 @@ void show_mem(unsigned int filter) | |||
61 | global_page_state(NR_PAGETABLE), | 61 | global_page_state(NR_PAGETABLE), |
62 | global_page_state(NR_BOUNCE), | 62 | global_page_state(NR_BOUNCE), |
63 | global_page_state(NR_FILE_PAGES), | 63 | global_page_state(NR_FILE_PAGES), |
64 | nr_swap_pages); | 64 | get_nr_swap_pages()); |
65 | 65 | ||
66 | for_each_zone(zone) { | 66 | for_each_zone(zone) { |
67 | unsigned long flags, order, total = 0, largest_order = -1; | 67 | unsigned long flags, order, total = 0, largest_order = -1; |
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 52560a2038e1..1b99ee5c9f00 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h | |||
@@ -57,8 +57,8 @@ static inline int numa_cpu_node(int cpu) | |||
57 | #endif | 57 | #endif |
58 | 58 | ||
59 | #ifdef CONFIG_NUMA | 59 | #ifdef CONFIG_NUMA |
60 | extern void __cpuinit numa_set_node(int cpu, int node); | 60 | extern void numa_set_node(int cpu, int node); |
61 | extern void __cpuinit numa_clear_node(int cpu); | 61 | extern void numa_clear_node(int cpu); |
62 | extern void __init init_cpu_to_node(void); | 62 | extern void __init init_cpu_to_node(void); |
63 | extern void __cpuinit numa_add_cpu(int cpu); | 63 | extern void __cpuinit numa_add_cpu(int cpu); |
64 | extern void __cpuinit numa_remove_cpu(int cpu); | 64 | extern void __cpuinit numa_remove_cpu(int cpu); |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index e6423002c10b..567b5d0632b2 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -351,6 +351,7 @@ static inline void update_page_count(int level, unsigned long pages) { } | |||
351 | * as a pte too. | 351 | * as a pte too. |
352 | */ | 352 | */ |
353 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); | 353 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); |
354 | extern int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase); | ||
354 | extern phys_addr_t slow_virt_to_phys(void *__address); | 355 | extern phys_addr_t slow_virt_to_phys(void *__address); |
355 | 356 | ||
356 | #endif /* !__ASSEMBLY__ */ | 357 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index cfc755dc1607..230c8ea878e5 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -696,6 +696,10 @@ EXPORT_SYMBOL(acpi_map_lsapic); | |||
696 | 696 | ||
697 | int acpi_unmap_lsapic(int cpu) | 697 | int acpi_unmap_lsapic(int cpu) |
698 | { | 698 | { |
699 | #ifdef CONFIG_ACPI_NUMA | ||
700 | set_apicid_to_node(per_cpu(x86_cpu_to_apicid, cpu), NUMA_NO_NODE); | ||
701 | #endif | ||
702 | |||
699 | per_cpu(x86_cpu_to_apicid, cpu) = -1; | 703 | per_cpu(x86_cpu_to_apicid, cpu) = -1; |
700 | set_cpu_present(cpu, false); | 704 | set_cpu_present(cpu, false); |
701 | num_processors--; | 705 | num_processors--; |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 915f5efefcf5..9c857f05cef0 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -1056,6 +1056,15 @@ void __init setup_arch(char **cmdline_p) | |||
1056 | setup_bios_corruption_check(); | 1056 | setup_bios_corruption_check(); |
1057 | #endif | 1057 | #endif |
1058 | 1058 | ||
1059 | /* | ||
1060 | * In the memory hotplug case, the kernel needs info from SRAT to | ||
1061 | * determine which memory is hotpluggable before allocating memory | ||
1062 | * using memblock. | ||
1063 | */ | ||
1064 | acpi_boot_table_init(); | ||
1065 | early_acpi_boot_init(); | ||
1066 | early_parse_srat(); | ||
1067 | |||
1059 | #ifdef CONFIG_X86_32 | 1068 | #ifdef CONFIG_X86_32 |
1060 | printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", | 1069 | printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", |
1061 | (max_pfn_mapped<<PAGE_SHIFT) - 1); | 1070 | (max_pfn_mapped<<PAGE_SHIFT) - 1); |
@@ -1101,10 +1110,6 @@ void __init setup_arch(char **cmdline_p) | |||
1101 | /* | 1110 | /* |
1102 | * Parse the ACPI tables for possible boot-time SMP configuration. | 1111 | * Parse the ACPI tables for possible boot-time SMP configuration. |
1103 | */ | 1112 | */ |
1104 | acpi_boot_table_init(); | ||
1105 | |||
1106 | early_acpi_boot_init(); | ||
1107 | |||
1108 | initmem_init(); | 1113 | initmem_init(); |
1109 | memblock_find_dma_reserve(); | 1114 | memblock_find_dma_reserve(); |
1110 | 1115 | ||
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index b299724f6e34..2d19001151d5 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -862,6 +862,18 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
862 | 862 | ||
863 | return __add_pages(nid, zone, start_pfn, nr_pages); | 863 | return __add_pages(nid, zone, start_pfn, nr_pages); |
864 | } | 864 | } |
865 | |||
866 | #ifdef CONFIG_MEMORY_HOTREMOVE | ||
867 | int arch_remove_memory(u64 start, u64 size) | ||
868 | { | ||
869 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
870 | unsigned long nr_pages = size >> PAGE_SHIFT; | ||
871 | struct zone *zone; | ||
872 | |||
873 | zone = page_zone(pfn_to_page(start_pfn)); | ||
874 | return __remove_pages(zone, start_pfn, nr_pages); | ||
875 | } | ||
876 | #endif | ||
865 | #endif | 877 | #endif |
866 | 878 | ||
867 | /* | 879 | /* |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3eba7f429880..474e28f10815 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -707,6 +707,343 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
707 | } | 707 | } |
708 | EXPORT_SYMBOL_GPL(arch_add_memory); | 708 | EXPORT_SYMBOL_GPL(arch_add_memory); |
709 | 709 | ||
710 | #define PAGE_INUSE 0xFD | ||
711 | |||
712 | static void __meminit free_pagetable(struct page *page, int order) | ||
713 | { | ||
714 | struct zone *zone; | ||
715 | bool bootmem = false; | ||
716 | unsigned long magic; | ||
717 | unsigned int nr_pages = 1 << order; | ||
718 | |||
719 | /* bootmem page has reserved flag */ | ||
720 | if (PageReserved(page)) { | ||
721 | __ClearPageReserved(page); | ||
722 | bootmem = true; | ||
723 | |||
724 | magic = (unsigned long)page->lru.next; | ||
725 | if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { | ||
726 | while (nr_pages--) | ||
727 | put_page_bootmem(page++); | ||
728 | } else | ||
729 | __free_pages_bootmem(page, order); | ||
730 | } else | ||
731 | free_pages((unsigned long)page_address(page), order); | ||
732 | |||
733 | /* | ||
734 | * SECTION_INFO pages and MIX_SECTION_INFO pages | ||
735 | * are all allocated by bootmem. | ||
736 | */ | ||
737 | if (bootmem) { | ||
738 | zone = page_zone(page); | ||
739 | zone_span_writelock(zone); | ||
740 | zone->present_pages += nr_pages; | ||
741 | zone_span_writeunlock(zone); | ||
742 | totalram_pages += nr_pages; | ||
743 | } | ||
744 | } | ||
745 | |||
746 | static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) | ||
747 | { | ||
748 | pte_t *pte; | ||
749 | int i; | ||
750 | |||
751 | for (i = 0; i < PTRS_PER_PTE; i++) { | ||
752 | pte = pte_start + i; | ||
753 | if (pte_val(*pte)) | ||
754 | return; | ||
755 | } | ||
756 | |||
757 | /* free a pte talbe */ | ||
758 | free_pagetable(pmd_page(*pmd), 0); | ||
759 | spin_lock(&init_mm.page_table_lock); | ||
760 | pmd_clear(pmd); | ||
761 | spin_unlock(&init_mm.page_table_lock); | ||
762 | } | ||
763 | |||
764 | static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) | ||
765 | { | ||
766 | pmd_t *pmd; | ||
767 | int i; | ||
768 | |||
769 | for (i = 0; i < PTRS_PER_PMD; i++) { | ||
770 | pmd = pmd_start + i; | ||
771 | if (pmd_val(*pmd)) | ||
772 | return; | ||
773 | } | ||
774 | |||
775 | /* free a pmd talbe */ | ||
776 | free_pagetable(pud_page(*pud), 0); | ||
777 | spin_lock(&init_mm.page_table_lock); | ||
778 | pud_clear(pud); | ||
779 | spin_unlock(&init_mm.page_table_lock); | ||
780 | } | ||
781 | |||
782 | /* Return true if pgd is changed, otherwise return false. */ | ||
783 | static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd) | ||
784 | { | ||
785 | pud_t *pud; | ||
786 | int i; | ||
787 | |||
788 | for (i = 0; i < PTRS_PER_PUD; i++) { | ||
789 | pud = pud_start + i; | ||
790 | if (pud_val(*pud)) | ||
791 | return false; | ||
792 | } | ||
793 | |||
794 | /* free a pud table */ | ||
795 | free_pagetable(pgd_page(*pgd), 0); | ||
796 | spin_lock(&init_mm.page_table_lock); | ||
797 | pgd_clear(pgd); | ||
798 | spin_unlock(&init_mm.page_table_lock); | ||
799 | |||
800 | return true; | ||
801 | } | ||
802 | |||
803 | static void __meminit | ||
804 | remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, | ||
805 | bool direct) | ||
806 | { | ||
807 | unsigned long next, pages = 0; | ||
808 | pte_t *pte; | ||
809 | void *page_addr; | ||
810 | phys_addr_t phys_addr; | ||
811 | |||
812 | pte = pte_start + pte_index(addr); | ||
813 | for (; addr < end; addr = next, pte++) { | ||
814 | next = (addr + PAGE_SIZE) & PAGE_MASK; | ||
815 | if (next > end) | ||
816 | next = end; | ||
817 | |||
818 | if (!pte_present(*pte)) | ||
819 | continue; | ||
820 | |||
821 | /* | ||
822 | * We mapped [0,1G) memory as identity mapping when | ||
823 | * initializing, in arch/x86/kernel/head_64.S. These | ||
824 | * pagetables cannot be removed. | ||
825 | */ | ||
826 | phys_addr = pte_val(*pte) + (addr & PAGE_MASK); | ||
827 | if (phys_addr < (phys_addr_t)0x40000000) | ||
828 | return; | ||
829 | |||
830 | if (IS_ALIGNED(addr, PAGE_SIZE) && | ||
831 | IS_ALIGNED(next, PAGE_SIZE)) { | ||
832 | /* | ||
833 | * Do not free direct mapping pages since they were | ||
834 | * freed when offlining, or simplely not in use. | ||
835 | */ | ||
836 | if (!direct) | ||
837 | free_pagetable(pte_page(*pte), 0); | ||
838 | |||
839 | spin_lock(&init_mm.page_table_lock); | ||
840 | pte_clear(&init_mm, addr, pte); | ||
841 | spin_unlock(&init_mm.page_table_lock); | ||
842 | |||
843 | /* For non-direct mapping, pages means nothing. */ | ||
844 | pages++; | ||
845 | } else { | ||
846 | /* | ||
847 | * If we are here, we are freeing vmemmap pages since | ||
848 | * direct mapped memory ranges to be freed are aligned. | ||
849 | * | ||
850 | * If we are not removing the whole page, it means | ||
851 | * other page structs in this page are being used and | ||
852 | * we canot remove them. So fill the unused page_structs | ||
853 | * with 0xFD, and remove the page when it is wholly | ||
854 | * filled with 0xFD. | ||
855 | */ | ||
856 | memset((void *)addr, PAGE_INUSE, next - addr); | ||
857 | |||
858 | page_addr = page_address(pte_page(*pte)); | ||
859 | if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { | ||
860 | free_pagetable(pte_page(*pte), 0); | ||
861 | |||
862 | spin_lock(&init_mm.page_table_lock); | ||
863 | pte_clear(&init_mm, addr, pte); | ||
864 | spin_unlock(&init_mm.page_table_lock); | ||
865 | } | ||
866 | } | ||
867 | } | ||
868 | |||
869 | /* Call free_pte_table() in remove_pmd_table(). */ | ||
870 | flush_tlb_all(); | ||
871 | if (direct) | ||
872 | update_page_count(PG_LEVEL_4K, -pages); | ||
873 | } | ||
874 | |||
875 | static void __meminit | ||
876 | remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, | ||
877 | bool direct) | ||
878 | { | ||
879 | unsigned long next, pages = 0; | ||
880 | pte_t *pte_base; | ||
881 | pmd_t *pmd; | ||
882 | void *page_addr; | ||
883 | |||
884 | pmd = pmd_start + pmd_index(addr); | ||
885 | for (; addr < end; addr = next, pmd++) { | ||
886 | next = pmd_addr_end(addr, end); | ||
887 | |||
888 | if (!pmd_present(*pmd)) | ||
889 | continue; | ||
890 | |||
891 | if (pmd_large(*pmd)) { | ||
892 | if (IS_ALIGNED(addr, PMD_SIZE) && | ||
893 | IS_ALIGNED(next, PMD_SIZE)) { | ||
894 | if (!direct) | ||
895 | free_pagetable(pmd_page(*pmd), | ||
896 | get_order(PMD_SIZE)); | ||
897 | |||
898 | spin_lock(&init_mm.page_table_lock); | ||
899 | pmd_clear(pmd); | ||
900 | spin_unlock(&init_mm.page_table_lock); | ||
901 | pages++; | ||
902 | } else { | ||
903 | /* If here, we are freeing vmemmap pages. */ | ||
904 | memset((void *)addr, PAGE_INUSE, next - addr); | ||
905 | |||
906 | page_addr = page_address(pmd_page(*pmd)); | ||
907 | if (!memchr_inv(page_addr, PAGE_INUSE, | ||
908 | PMD_SIZE)) { | ||
909 | free_pagetable(pmd_page(*pmd), | ||
910 | get_order(PMD_SIZE)); | ||
911 | |||
912 | spin_lock(&init_mm.page_table_lock); | ||
913 | pmd_clear(pmd); | ||
914 | spin_unlock(&init_mm.page_table_lock); | ||
915 | } | ||
916 | } | ||
917 | |||
918 | continue; | ||
919 | } | ||
920 | |||
921 | pte_base = (pte_t *)pmd_page_vaddr(*pmd); | ||
922 | remove_pte_table(pte_base, addr, next, direct); | ||
923 | free_pte_table(pte_base, pmd); | ||
924 | } | ||
925 | |||
926 | /* Call free_pmd_table() in remove_pud_table(). */ | ||
927 | if (direct) | ||
928 | update_page_count(PG_LEVEL_2M, -pages); | ||
929 | } | ||
930 | |||
931 | static void __meminit | ||
932 | remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, | ||
933 | bool direct) | ||
934 | { | ||
935 | unsigned long next, pages = 0; | ||
936 | pmd_t *pmd_base; | ||
937 | pud_t *pud; | ||
938 | void *page_addr; | ||
939 | |||
940 | pud = pud_start + pud_index(addr); | ||
941 | for (; addr < end; addr = next, pud++) { | ||
942 | next = pud_addr_end(addr, end); | ||
943 | |||
944 | if (!pud_present(*pud)) | ||
945 | continue; | ||
946 | |||
947 | if (pud_large(*pud)) { | ||
948 | if (IS_ALIGNED(addr, PUD_SIZE) && | ||
949 | IS_ALIGNED(next, PUD_SIZE)) { | ||
950 | if (!direct) | ||
951 | free_pagetable(pud_page(*pud), | ||
952 | get_order(PUD_SIZE)); | ||
953 | |||
954 | spin_lock(&init_mm.page_table_lock); | ||
955 | pud_clear(pud); | ||
956 | spin_unlock(&init_mm.page_table_lock); | ||
957 | pages++; | ||
958 | } else { | ||
959 | /* If here, we are freeing vmemmap pages. */ | ||
960 | memset((void *)addr, PAGE_INUSE, next - addr); | ||
961 | |||
962 | page_addr = page_address(pud_page(*pud)); | ||
963 | if (!memchr_inv(page_addr, PAGE_INUSE, | ||
964 | PUD_SIZE)) { | ||
965 | free_pagetable(pud_page(*pud), | ||
966 | get_order(PUD_SIZE)); | ||
967 | |||
968 | spin_lock(&init_mm.page_table_lock); | ||
969 | pud_clear(pud); | ||
970 | spin_unlock(&init_mm.page_table_lock); | ||
971 | } | ||
972 | } | ||
973 | |||
974 | continue; | ||
975 | } | ||
976 | |||
977 | pmd_base = (pmd_t *)pud_page_vaddr(*pud); | ||
978 | remove_pmd_table(pmd_base, addr, next, direct); | ||
979 | free_pmd_table(pmd_base, pud); | ||
980 | } | ||
981 | |||
982 | if (direct) | ||
983 | update_page_count(PG_LEVEL_1G, -pages); | ||
984 | } | ||
985 | |||
986 | /* start and end are both virtual address. */ | ||
987 | static void __meminit | ||
988 | remove_pagetable(unsigned long start, unsigned long end, bool direct) | ||
989 | { | ||
990 | unsigned long next; | ||
991 | pgd_t *pgd; | ||
992 | pud_t *pud; | ||
993 | bool pgd_changed = false; | ||
994 | |||
995 | for (; start < end; start = next) { | ||
996 | next = pgd_addr_end(start, end); | ||
997 | |||
998 | pgd = pgd_offset_k(start); | ||
999 | if (!pgd_present(*pgd)) | ||
1000 | continue; | ||
1001 | |||
1002 | pud = (pud_t *)pgd_page_vaddr(*pgd); | ||
1003 | remove_pud_table(pud, start, next, direct); | ||
1004 | if (free_pud_table(pud, pgd)) | ||
1005 | pgd_changed = true; | ||
1006 | } | ||
1007 | |||
1008 | if (pgd_changed) | ||
1009 | sync_global_pgds(start, end - 1); | ||
1010 | |||
1011 | flush_tlb_all(); | ||
1012 | } | ||
1013 | |||
1014 | void __ref vmemmap_free(struct page *memmap, unsigned long nr_pages) | ||
1015 | { | ||
1016 | unsigned long start = (unsigned long)memmap; | ||
1017 | unsigned long end = (unsigned long)(memmap + nr_pages); | ||
1018 | |||
1019 | remove_pagetable(start, end, false); | ||
1020 | } | ||
1021 | |||
1022 | static void __meminit | ||
1023 | kernel_physical_mapping_remove(unsigned long start, unsigned long end) | ||
1024 | { | ||
1025 | start = (unsigned long)__va(start); | ||
1026 | end = (unsigned long)__va(end); | ||
1027 | |||
1028 | remove_pagetable(start, end, true); | ||
1029 | } | ||
1030 | |||
1031 | #ifdef CONFIG_MEMORY_HOTREMOVE | ||
1032 | int __ref arch_remove_memory(u64 start, u64 size) | ||
1033 | { | ||
1034 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
1035 | unsigned long nr_pages = size >> PAGE_SHIFT; | ||
1036 | struct zone *zone; | ||
1037 | int ret; | ||
1038 | |||
1039 | zone = page_zone(pfn_to_page(start_pfn)); | ||
1040 | kernel_physical_mapping_remove(start, start + size); | ||
1041 | ret = __remove_pages(zone, start_pfn, nr_pages); | ||
1042 | WARN_ON_ONCE(ret); | ||
1043 | |||
1044 | return ret; | ||
1045 | } | ||
1046 | #endif | ||
710 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 1047 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
711 | 1048 | ||
712 | static struct kcore_list kcore_vsyscall; | 1049 | static struct kcore_list kcore_vsyscall; |
@@ -1019,6 +1356,66 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) | |||
1019 | return 0; | 1356 | return 0; |
1020 | } | 1357 | } |
1021 | 1358 | ||
1359 | #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE) | ||
1360 | void register_page_bootmem_memmap(unsigned long section_nr, | ||
1361 | struct page *start_page, unsigned long size) | ||
1362 | { | ||
1363 | unsigned long addr = (unsigned long)start_page; | ||
1364 | unsigned long end = (unsigned long)(start_page + size); | ||
1365 | unsigned long next; | ||
1366 | pgd_t *pgd; | ||
1367 | pud_t *pud; | ||
1368 | pmd_t *pmd; | ||
1369 | unsigned int nr_pages; | ||
1370 | struct page *page; | ||
1371 | |||
1372 | for (; addr < end; addr = next) { | ||
1373 | pte_t *pte = NULL; | ||
1374 | |||
1375 | pgd = pgd_offset_k(addr); | ||
1376 | if (pgd_none(*pgd)) { | ||
1377 | next = (addr + PAGE_SIZE) & PAGE_MASK; | ||
1378 | continue; | ||
1379 | } | ||
1380 | get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO); | ||
1381 | |||
1382 | pud = pud_offset(pgd, addr); | ||
1383 | if (pud_none(*pud)) { | ||
1384 | next = (addr + PAGE_SIZE) & PAGE_MASK; | ||
1385 | continue; | ||
1386 | } | ||
1387 | get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO); | ||
1388 | |||
1389 | if (!cpu_has_pse) { | ||
1390 | next = (addr + PAGE_SIZE) & PAGE_MASK; | ||
1391 | pmd = pmd_offset(pud, addr); | ||
1392 | if (pmd_none(*pmd)) | ||
1393 | continue; | ||
1394 | get_page_bootmem(section_nr, pmd_page(*pmd), | ||
1395 | MIX_SECTION_INFO); | ||
1396 | |||
1397 | pte = pte_offset_kernel(pmd, addr); | ||
1398 | if (pte_none(*pte)) | ||
1399 | continue; | ||
1400 | get_page_bootmem(section_nr, pte_page(*pte), | ||
1401 | SECTION_INFO); | ||
1402 | } else { | ||
1403 | next = pmd_addr_end(addr, end); | ||
1404 | |||
1405 | pmd = pmd_offset(pud, addr); | ||
1406 | if (pmd_none(*pmd)) | ||
1407 | continue; | ||
1408 | |||
1409 | nr_pages = 1 << (get_order(PMD_SIZE)); | ||
1410 | page = pmd_page(*pmd); | ||
1411 | while (nr_pages--) | ||
1412 | get_page_bootmem(section_nr, page++, | ||
1413 | SECTION_INFO); | ||
1414 | } | ||
1415 | } | ||
1416 | } | ||
1417 | #endif | ||
1418 | |||
1022 | void __meminit vmemmap_populate_print_last(void) | 1419 | void __meminit vmemmap_populate_print_last(void) |
1023 | { | 1420 | { |
1024 | if (p_start) { | 1421 | if (p_start) { |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 8504f3698753..dfd30259eb89 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -56,7 +56,7 @@ early_param("numa", numa_setup); | |||
56 | /* | 56 | /* |
57 | * apicid, cpu, node mappings | 57 | * apicid, cpu, node mappings |
58 | */ | 58 | */ |
59 | s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | 59 | s16 __apicid_to_node[MAX_LOCAL_APIC] = { |
60 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | 60 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE |
61 | }; | 61 | }; |
62 | 62 | ||
@@ -78,7 +78,7 @@ EXPORT_SYMBOL(node_to_cpumask_map); | |||
78 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); | 78 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); |
79 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); | 79 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); |
80 | 80 | ||
81 | void __cpuinit numa_set_node(int cpu, int node) | 81 | void numa_set_node(int cpu, int node) |
82 | { | 82 | { |
83 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); | 83 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); |
84 | 84 | ||
@@ -101,7 +101,7 @@ void __cpuinit numa_set_node(int cpu, int node) | |||
101 | set_cpu_numa_node(cpu, node); | 101 | set_cpu_numa_node(cpu, node); |
102 | } | 102 | } |
103 | 103 | ||
104 | void __cpuinit numa_clear_node(int cpu) | 104 | void numa_clear_node(int cpu) |
105 | { | 105 | { |
106 | numa_set_node(cpu, NUMA_NO_NODE); | 106 | numa_set_node(cpu, NUMA_NO_NODE); |
107 | } | 107 | } |
@@ -213,10 +213,9 @@ static void __init setup_node_data(int nid, u64 start, u64 end) | |||
213 | * Allocate node data. Try node-local memory and then any node. | 213 | * Allocate node data. Try node-local memory and then any node. |
214 | * Never allocate in DMA zone. | 214 | * Never allocate in DMA zone. |
215 | */ | 215 | */ |
216 | nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); | 216 | nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); |
217 | if (!nd_pa) { | 217 | if (!nd_pa) { |
218 | pr_err("Cannot find %zu bytes in node %d\n", | 218 | pr_err("Cannot find %zu bytes in any node\n", nd_size); |
219 | nd_size, nid); | ||
220 | return; | 219 | return; |
221 | } | 220 | } |
222 | nd = __va(nd_pa); | 221 | nd = __va(nd_pa); |
@@ -561,10 +560,12 @@ static int __init numa_init(int (*init_func)(void)) | |||
561 | for (i = 0; i < MAX_LOCAL_APIC; i++) | 560 | for (i = 0; i < MAX_LOCAL_APIC; i++) |
562 | set_apicid_to_node(i, NUMA_NO_NODE); | 561 | set_apicid_to_node(i, NUMA_NO_NODE); |
563 | 562 | ||
564 | nodes_clear(numa_nodes_parsed); | 563 | /* |
564 | * Do not clear numa_nodes_parsed or zero numa_meminfo here, because | ||
565 | * SRAT was parsed earlier in early_parse_srat(). | ||
566 | */ | ||
565 | nodes_clear(node_possible_map); | 567 | nodes_clear(node_possible_map); |
566 | nodes_clear(node_online_map); | 568 | nodes_clear(node_online_map); |
567 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); | ||
568 | WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES)); | 569 | WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES)); |
569 | numa_reset_distance(); | 570 | numa_reset_distance(); |
570 | 571 | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index a1b1c88f9caf..ca1f1c2bb7be 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -529,21 +529,13 @@ out_unlock: | |||
529 | return do_split; | 529 | return do_split; |
530 | } | 530 | } |
531 | 531 | ||
532 | static int split_large_page(pte_t *kpte, unsigned long address) | 532 | int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase) |
533 | { | 533 | { |
534 | unsigned long pfn, pfninc = 1; | 534 | unsigned long pfn, pfninc = 1; |
535 | unsigned int i, level; | 535 | unsigned int i, level; |
536 | pte_t *pbase, *tmp; | 536 | pte_t *tmp; |
537 | pgprot_t ref_prot; | 537 | pgprot_t ref_prot; |
538 | struct page *base; | 538 | struct page *base = virt_to_page(pbase); |
539 | |||
540 | if (!debug_pagealloc) | ||
541 | spin_unlock(&cpa_lock); | ||
542 | base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); | ||
543 | if (!debug_pagealloc) | ||
544 | spin_lock(&cpa_lock); | ||
545 | if (!base) | ||
546 | return -ENOMEM; | ||
547 | 539 | ||
548 | spin_lock(&pgd_lock); | 540 | spin_lock(&pgd_lock); |
549 | /* | 541 | /* |
@@ -551,10 +543,11 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
551 | * up for us already: | 543 | * up for us already: |
552 | */ | 544 | */ |
553 | tmp = lookup_address(address, &level); | 545 | tmp = lookup_address(address, &level); |
554 | if (tmp != kpte) | 546 | if (tmp != kpte) { |
555 | goto out_unlock; | 547 | spin_unlock(&pgd_lock); |
548 | return 1; | ||
549 | } | ||
556 | 550 | ||
557 | pbase = (pte_t *)page_address(base); | ||
558 | paravirt_alloc_pte(&init_mm, page_to_pfn(base)); | 551 | paravirt_alloc_pte(&init_mm, page_to_pfn(base)); |
559 | ref_prot = pte_pgprot(pte_clrhuge(*kpte)); | 552 | ref_prot = pte_pgprot(pte_clrhuge(*kpte)); |
560 | /* | 553 | /* |
@@ -601,17 +594,27 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
601 | * going on. | 594 | * going on. |
602 | */ | 595 | */ |
603 | __flush_tlb_all(); | 596 | __flush_tlb_all(); |
597 | spin_unlock(&pgd_lock); | ||
604 | 598 | ||
605 | base = NULL; | 599 | return 0; |
600 | } | ||
606 | 601 | ||
607 | out_unlock: | 602 | static int split_large_page(pte_t *kpte, unsigned long address) |
608 | /* | 603 | { |
609 | * If we dropped out via the lookup_address check under | 604 | pte_t *pbase; |
610 | * pgd_lock then stick the page back into the pool: | 605 | struct page *base; |
611 | */ | 606 | |
612 | if (base) | 607 | if (!debug_pagealloc) |
608 | spin_unlock(&cpa_lock); | ||
609 | base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); | ||
610 | if (!debug_pagealloc) | ||
611 | spin_lock(&cpa_lock); | ||
612 | if (!base) | ||
613 | return -ENOMEM; | ||
614 | |||
615 | pbase = (pte_t *)page_address(base); | ||
616 | if (__split_large_page(kpte, address, pbase)) | ||
613 | __free_page(base); | 617 | __free_page(base); |
614 | spin_unlock(&pgd_lock); | ||
615 | 618 | ||
616 | return 0; | 619 | return 0; |
617 | } | 620 | } |
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index cdd0da9dd530..79836d01f789 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c | |||
@@ -141,11 +141,126 @@ static inline int save_add_info(void) {return 1;} | |||
141 | static inline int save_add_info(void) {return 0;} | 141 | static inline int save_add_info(void) {return 0;} |
142 | #endif | 142 | #endif |
143 | 143 | ||
144 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | ||
145 | static void __init | ||
146 | handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable) | ||
147 | { | ||
148 | int overlap, i; | ||
149 | unsigned long start_pfn, end_pfn; | ||
150 | |||
151 | start_pfn = PFN_DOWN(start); | ||
152 | end_pfn = PFN_UP(end); | ||
153 | |||
154 | /* | ||
155 | * For movablemem_map=acpi: | ||
156 | * | ||
157 | * SRAT: |_____| |_____| |_________| |_________| ...... | ||
158 | * node id: 0 1 1 2 | ||
159 | * hotpluggable: n y y n | ||
160 | * movablemem_map: |_____| |_________| | ||
161 | * | ||
162 | * Using movablemem_map, we can prevent memblock from allocating memory | ||
163 | * on ZONE_MOVABLE at boot time. | ||
164 | * | ||
165 | * Before parsing SRAT, memblock has already reserve some memory ranges | ||
166 | * for other purposes, such as for kernel image. We cannot prevent | ||
167 | * kernel from using these memory, so we need to exclude these memory | ||
168 | * even if it is hotpluggable. | ||
169 | * Furthermore, to ensure the kernel has enough memory to boot, we make | ||
170 | * all the memory on the node which the kernel resides in | ||
171 | * un-hotpluggable. | ||
172 | */ | ||
173 | if (hotpluggable && movablemem_map.acpi) { | ||
174 | /* Exclude ranges reserved by memblock. */ | ||
175 | struct memblock_type *rgn = &memblock.reserved; | ||
176 | |||
177 | for (i = 0; i < rgn->cnt; i++) { | ||
178 | if (end <= rgn->regions[i].base || | ||
179 | start >= rgn->regions[i].base + | ||
180 | rgn->regions[i].size) | ||
181 | continue; | ||
182 | |||
183 | /* | ||
184 | * If the memory range overlaps the memory reserved by | ||
185 | * memblock, then the kernel resides in this node. | ||
186 | */ | ||
187 | node_set(node, movablemem_map.numa_nodes_kernel); | ||
188 | |||
189 | goto out; | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * If the kernel resides in this node, then the whole node | ||
194 | * should not be hotpluggable. | ||
195 | */ | ||
196 | if (node_isset(node, movablemem_map.numa_nodes_kernel)) | ||
197 | goto out; | ||
198 | |||
199 | insert_movablemem_map(start_pfn, end_pfn); | ||
200 | |||
201 | /* | ||
202 | * numa_nodes_hotplug nodemask represents which nodes are put | ||
203 | * into movablemem_map.map[]. | ||
204 | */ | ||
205 | node_set(node, movablemem_map.numa_nodes_hotplug); | ||
206 | goto out; | ||
207 | } | ||
208 | |||
209 | /* | ||
210 | * For movablemem_map=nn[KMG]@ss[KMG]: | ||
211 | * | ||
212 | * SRAT: |_____| |_____| |_________| |_________| ...... | ||
213 | * node id: 0 1 1 2 | ||
214 | * user specified: |__| |___| | ||
215 | * movablemem_map: |___| |_________| |______| ...... | ||
216 | * | ||
217 | * Using movablemem_map, we can prevent memblock from allocating memory | ||
218 | * on ZONE_MOVABLE at boot time. | ||
219 | * | ||
220 | * NOTE: In this case, SRAT info will be ingored. | ||
221 | */ | ||
222 | overlap = movablemem_map_overlap(start_pfn, end_pfn); | ||
223 | if (overlap >= 0) { | ||
224 | /* | ||
225 | * If part of this range is in movablemem_map, we need to | ||
226 | * add the range after it to extend the range to the end | ||
227 | * of the node, because from the min address specified to | ||
228 | * the end of the node will be ZONE_MOVABLE. | ||
229 | */ | ||
230 | start_pfn = max(start_pfn, | ||
231 | movablemem_map.map[overlap].start_pfn); | ||
232 | insert_movablemem_map(start_pfn, end_pfn); | ||
233 | |||
234 | /* | ||
235 | * Set the nodemask, so that if the address range on one node | ||
236 | * is not continuse, we can add the subsequent ranges on the | ||
237 | * same node into movablemem_map. | ||
238 | */ | ||
239 | node_set(node, movablemem_map.numa_nodes_hotplug); | ||
240 | } else { | ||
241 | if (node_isset(node, movablemem_map.numa_nodes_hotplug)) | ||
242 | /* | ||
243 | * Insert the range if we already have movable ranges | ||
244 | * on the same node. | ||
245 | */ | ||
246 | insert_movablemem_map(start_pfn, end_pfn); | ||
247 | } | ||
248 | out: | ||
249 | return; | ||
250 | } | ||
251 | #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | ||
252 | static inline void | ||
253 | handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable) | ||
254 | { | ||
255 | } | ||
256 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | ||
257 | |||
144 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ | 258 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ |
145 | int __init | 259 | int __init |
146 | acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | 260 | acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) |
147 | { | 261 | { |
148 | u64 start, end; | 262 | u64 start, end; |
263 | u32 hotpluggable; | ||
149 | int node, pxm; | 264 | int node, pxm; |
150 | 265 | ||
151 | if (srat_disabled()) | 266 | if (srat_disabled()) |
@@ -154,7 +269,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
154 | goto out_err_bad_srat; | 269 | goto out_err_bad_srat; |
155 | if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) | 270 | if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) |
156 | goto out_err; | 271 | goto out_err; |
157 | if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info()) | 272 | hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE; |
273 | if (hotpluggable && !save_add_info()) | ||
158 | goto out_err; | 274 | goto out_err; |
159 | 275 | ||
160 | start = ma->base_address; | 276 | start = ma->base_address; |
@@ -174,9 +290,12 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
174 | 290 | ||
175 | node_set(node, numa_nodes_parsed); | 291 | node_set(node, numa_nodes_parsed); |
176 | 292 | ||
177 | printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", | 293 | printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n", |
178 | node, pxm, | 294 | node, pxm, |
179 | (unsigned long long) start, (unsigned long long) end - 1); | 295 | (unsigned long long) start, (unsigned long long) end - 1, |
296 | hotpluggable ? "Hot Pluggable": ""); | ||
297 | |||
298 | handle_movablemem(node, start, end, hotpluggable); | ||
180 | 299 | ||
181 | return 0; | 300 | return 0; |
182 | out_err_bad_srat: | 301 | out_err_bad_srat: |