aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm/kernel/setup.c1
-rw-r--r--arch/arm/mm/init.c4
-rw-r--r--arch/ia64/Kconfig6
-rw-r--r--arch/ia64/mm/contig.c3
-rw-r--r--arch/ia64/mm/init.c4
-rw-r--r--arch/microblaze/include/asm/memblock.h14
-rw-r--r--arch/microblaze/kernel/prom.c3
-rw-r--r--arch/mips/Kconfig6
-rw-r--r--arch/mips/kernel/setup.c3
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c5
-rw-r--r--arch/openrisc/include/asm/memblock.h24
-rw-r--r--arch/openrisc/kernel/prom.c3
-rw-r--r--arch/powerpc/Kconfig4
-rw-r--r--arch/powerpc/include/asm/memblock.h8
-rw-r--r--arch/powerpc/kernel/machine_kexec.c3
-rw-r--r--arch/powerpc/kernel/prom.c20
-rw-r--r--arch/powerpc/mm/init_32.c4
-rw-r--r--arch/powerpc/mm/mem.c2
-rw-r--r--arch/powerpc/mm/numa.c60
-rw-r--r--arch/powerpc/mm/tlb_nohash.c1
-rw-r--r--arch/powerpc/platforms/embedded6xx/wii.c23
-rw-r--r--arch/powerpc/platforms/ps3/mm.c1
-rw-r--r--arch/s390/Kconfig6
-rw-r--r--arch/s390/kernel/setup.c4
-rw-r--r--arch/score/Kconfig6
-rw-r--r--arch/score/kernel/setup.c4
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/sh/include/asm/memblock.h4
-rw-r--r--arch/sh/kernel/machine_kexec.c3
-rw-r--r--arch/sh/kernel/setup.c3
-rw-r--r--arch/sh/mm/Kconfig3
-rw-r--r--arch/sh/mm/init.c3
-rw-r--r--arch/sparc/Kconfig4
-rw-r--r--arch/sparc/include/asm/memblock.h8
-rw-r--r--arch/sparc/mm/init_64.c32
-rw-r--r--arch/unicore32/kernel/setup.c1
-rw-r--r--arch/unicore32/mm/init.c4
-rw-r--r--arch/unicore32/mm/mmu.c1
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/include/asm/e820.h2
-rw-r--r--arch/x86/include/asm/memblock.h23
-rw-r--r--arch/x86/kernel/aperture_64.c4
-rw-r--r--arch/x86/kernel/check.c34
-rw-r--r--arch/x86/kernel/e820.c58
-rw-r--r--arch/x86/kernel/head.c2
-rw-r--r--arch/x86/kernel/head32.c7
-rw-r--r--arch/x86/kernel/head64.c7
-rw-r--r--arch/x86/kernel/mpparse.c12
-rw-r--r--arch/x86/kernel/setup.c21
-rw-r--r--arch/x86/kernel/trampoline.c4
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/init.c8
-rw-r--r--arch/x86/mm/init_32.c36
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/memblock.c348
-rw-r--r--arch/x86/mm/memtest.c33
-rw-r--r--arch/x86/mm/numa.c37
-rw-r--r--arch/x86/mm/numa_32.c10
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/mm/numa_emulation.c36
-rw-r--r--arch/x86/platform/efi/efi.c9
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--arch/x86/xen/mmu.c12
-rw-r--r--arch/x86/xen/setup.c7
-rw-r--r--drivers/iommu/intel-iommu.c25
-rw-r--r--include/linux/bootmem.h2
-rw-r--r--include/linux/memblock.h170
-rw-r--r--include/linux/mm.h34
-rw-r--r--include/linux/mmzone.h8
-rw-r--r--include/linux/poison.h6
-rw-r--r--kernel/printk.c2
-rw-r--r--mm/Kconfig6
-rw-r--r--mm/memblock.c961
-rw-r--r--mm/nobootmem.c45
-rw-r--r--mm/page_alloc.c508
75 files changed, 1032 insertions, 1747 deletions
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 8fc2c8fcbdc6..c0b59bff6be6 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -52,6 +52,7 @@
52#include <asm/mach/time.h> 52#include <asm/mach/time.h>
53#include <asm/traps.h> 53#include <asm/traps.h>
54#include <asm/unwind.h> 54#include <asm/unwind.h>
55#include <asm/memblock.h>
55 56
56#if defined(CONFIG_DEPRECATED_PARAM_STRUCT) 57#if defined(CONFIG_DEPRECATED_PARAM_STRUCT)
57#include "compat.h" 58#include "compat.h"
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index fbdd12ea3a58..7c38474e533a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -32,6 +32,7 @@
32 32
33#include <asm/mach/arch.h> 33#include <asm/mach/arch.h>
34#include <asm/mach/map.h> 34#include <asm/mach/map.h>
35#include <asm/memblock.h>
35 36
36#include "mm.h" 37#include "mm.h"
37 38
@@ -332,7 +333,6 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
332 333
333 sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL); 334 sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL);
334 335
335 memblock_init();
336 for (i = 0; i < mi->nr_banks; i++) 336 for (i = 0; i < mi->nr_banks; i++)
337 memblock_add(mi->bank[i].start, mi->bank[i].size); 337 memblock_add(mi->bank[i].start, mi->bank[i].size);
338 338
@@ -371,7 +371,7 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
371 if (mdesc->reserve) 371 if (mdesc->reserve)
372 mdesc->reserve(); 372 mdesc->reserve();
373 373
374 memblock_analyze(); 374 memblock_allow_resize();
375 memblock_dump_all(); 375 memblock_dump_all();
376} 376}
377 377
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 27489b6dd533..3b7a7c483785 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -23,6 +23,9 @@ config IA64
23 select HAVE_ARCH_TRACEHOOK 23 select HAVE_ARCH_TRACEHOOK
24 select HAVE_DMA_API_DEBUG 24 select HAVE_DMA_API_DEBUG
25 select HAVE_GENERIC_HARDIRQS 25 select HAVE_GENERIC_HARDIRQS
26 select HAVE_MEMBLOCK
27 select HAVE_MEMBLOCK_NODE_MAP
28 select ARCH_DISCARD_MEMBLOCK
26 select GENERIC_IRQ_PROBE 29 select GENERIC_IRQ_PROBE
27 select GENERIC_PENDING_IRQ if SMP 30 select GENERIC_PENDING_IRQ if SMP
28 select IRQ_PER_CPU 31 select IRQ_PER_CPU
@@ -474,9 +477,6 @@ config NODES_SHIFT
474 MAX_NUMNODES will be 2^(This value). 477 MAX_NUMNODES will be 2^(This value).
475 If in doubt, use the default. 478 If in doubt, use the default.
476 479
477config ARCH_POPULATES_NODE_MAP
478 def_bool y
479
480# VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent. 480# VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
481# VIRTUAL_MEM_MAP has been retained for historical reasons. 481# VIRTUAL_MEM_MAP has been retained for historical reasons.
482config VIRTUAL_MEM_MAP 482config VIRTUAL_MEM_MAP
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index f114a3b14c6a..1516d1dc11fd 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -16,6 +16,7 @@
16 */ 16 */
17#include <linux/bootmem.h> 17#include <linux/bootmem.h>
18#include <linux/efi.h> 18#include <linux/efi.h>
19#include <linux/memblock.h>
19#include <linux/mm.h> 20#include <linux/mm.h>
20#include <linux/nmi.h> 21#include <linux/nmi.h>
21#include <linux/swap.h> 22#include <linux/swap.h>
@@ -348,7 +349,7 @@ paging_init (void)
348 printk("Virtual mem_map starts at 0x%p\n", mem_map); 349 printk("Virtual mem_map starts at 0x%p\n", mem_map);
349 } 350 }
350#else /* !CONFIG_VIRTUAL_MEM_MAP */ 351#else /* !CONFIG_VIRTUAL_MEM_MAP */
351 add_active_range(0, 0, max_low_pfn); 352 memblock_add_node(0, PFN_PHYS(max_low_pfn), 0);
352 free_area_init_nodes(max_zone_pfns); 353 free_area_init_nodes(max_zone_pfns);
353#endif /* !CONFIG_VIRTUAL_MEM_MAP */ 354#endif /* !CONFIG_VIRTUAL_MEM_MAP */
354 zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); 355 zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 00cb0e26c64e..13df239dbed1 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -10,6 +10,7 @@
10#include <linux/bootmem.h> 10#include <linux/bootmem.h>
11#include <linux/efi.h> 11#include <linux/efi.h>
12#include <linux/elf.h> 12#include <linux/elf.h>
13#include <linux/memblock.h>
13#include <linux/mm.h> 14#include <linux/mm.h>
14#include <linux/mmzone.h> 15#include <linux/mmzone.h>
15#include <linux/module.h> 16#include <linux/module.h>
@@ -557,8 +558,7 @@ int __init register_active_ranges(u64 start, u64 len, int nid)
557#endif 558#endif
558 559
559 if (start < end) 560 if (start < end)
560 add_active_range(nid, __pa(start) >> PAGE_SHIFT, 561 memblock_add_node(__pa(start), end - start, nid);
561 __pa(end) >> PAGE_SHIFT);
562 return 0; 562 return 0;
563} 563}
564 564
diff --git a/arch/microblaze/include/asm/memblock.h b/arch/microblaze/include/asm/memblock.h
deleted file mode 100644
index 20a8e257c77f..000000000000
--- a/arch/microblaze/include/asm/memblock.h
+++ /dev/null
@@ -1,14 +0,0 @@
1/*
2 * Copyright (C) 2008 Michal Simek <monstr@monstr.eu>
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details.
7 */
8
9#ifndef _ASM_MICROBLAZE_MEMBLOCK_H
10#define _ASM_MICROBLAZE_MEMBLOCK_H
11
12#endif /* _ASM_MICROBLAZE_MEMBLOCK_H */
13
14
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 977484add216..80d314e81901 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -122,7 +122,6 @@ void __init early_init_devtree(void *params)
122 of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line); 122 of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line);
123 123
124 /* Scan memory nodes and rebuild MEMBLOCKs */ 124 /* Scan memory nodes and rebuild MEMBLOCKs */
125 memblock_init();
126 of_scan_flat_dt(early_init_dt_scan_root, NULL); 125 of_scan_flat_dt(early_init_dt_scan_root, NULL);
127 of_scan_flat_dt(early_init_dt_scan_memory, NULL); 126 of_scan_flat_dt(early_init_dt_scan_memory, NULL);
128 127
@@ -130,7 +129,7 @@ void __init early_init_devtree(void *params)
130 strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); 129 strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
131 parse_early_param(); 130 parse_early_param();
132 131
133 memblock_analyze(); 132 memblock_allow_resize();
134 133
135 pr_debug("Phys. mem: %lx\n", (unsigned long) memblock_phys_mem_size()); 134 pr_debug("Phys. mem: %lx\n", (unsigned long) memblock_phys_mem_size());
136 135
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index d46f1da18a3c..9c652eb68aaa 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -25,6 +25,9 @@ config MIPS
25 select GENERIC_IRQ_SHOW 25 select GENERIC_IRQ_SHOW
26 select HAVE_ARCH_JUMP_LABEL 26 select HAVE_ARCH_JUMP_LABEL
27 select IRQ_FORCED_THREADING 27 select IRQ_FORCED_THREADING
28 select HAVE_MEMBLOCK
29 select HAVE_MEMBLOCK_NODE_MAP
30 select ARCH_DISCARD_MEMBLOCK
28 31
29menu "Machine selection" 32menu "Machine selection"
30 33
@@ -2064,9 +2067,6 @@ config ARCH_DISCONTIGMEM_ENABLE
2064 or have huge holes in the physical address space for other reasons. 2067 or have huge holes in the physical address space for other reasons.
2065 See <file:Documentation/vm/numa> for more. 2068 See <file:Documentation/vm/numa> for more.
2066 2069
2067config ARCH_POPULATES_NODE_MAP
2068 def_bool y
2069
2070config ARCH_SPARSEMEM_ENABLE 2070config ARCH_SPARSEMEM_ENABLE
2071 bool 2071 bool
2072 select SPARSEMEM_STATIC 2072 select SPARSEMEM_STATIC
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 84af26ab2212..b1cb8f87d7b4 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -14,6 +14,7 @@
14#include <linux/ioport.h> 14#include <linux/ioport.h>
15#include <linux/export.h> 15#include <linux/export.h>
16#include <linux/screen_info.h> 16#include <linux/screen_info.h>
17#include <linux/memblock.h>
17#include <linux/bootmem.h> 18#include <linux/bootmem.h>
18#include <linux/initrd.h> 19#include <linux/initrd.h>
19#include <linux/root_dev.h> 20#include <linux/root_dev.h>
@@ -352,7 +353,7 @@ static void __init bootmem_init(void)
352 continue; 353 continue;
353#endif 354#endif
354 355
355 add_active_range(0, start, end); 356 memblock_add_node(PFN_PHYS(start), PFN_PHYS(end - start), 0);
356 } 357 }
357 358
358 /* 359 /*
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index bc1297109cc5..b105eca3c020 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -12,6 +12,7 @@
12 */ 12 */
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/memblock.h>
15#include <linux/mm.h> 16#include <linux/mm.h>
16#include <linux/mmzone.h> 17#include <linux/mmzone.h>
17#include <linux/module.h> 18#include <linux/module.h>
@@ -381,8 +382,8 @@ static void __init szmem(void)
381 continue; 382 continue;
382 } 383 }
383 num_physpages += slot_psize; 384 num_physpages += slot_psize;
384 add_active_range(node, slot_getbasepfn(node, slot), 385 memblock_add_node(PFN_PHYS(slot_getbasepfn(node, slot)),
385 slot_getbasepfn(node, slot) + slot_psize); 386 PFN_PHYS(slot_psize), node);
386 } 387 }
387 } 388 }
388} 389}
diff --git a/arch/openrisc/include/asm/memblock.h b/arch/openrisc/include/asm/memblock.h
deleted file mode 100644
index bbe5a1c788cb..000000000000
--- a/arch/openrisc/include/asm/memblock.h
+++ /dev/null
@@ -1,24 +0,0 @@
1/*
2 * OpenRISC Linux
3 *
4 * Linux architectural port borrowing liberally from similar works of
5 * others. All original copyrights apply as per the original source
6 * declaration.
7 *
8 * OpenRISC implementation:
9 * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
10 * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
11 * et al.
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 */
18
19#ifndef __ASM_OPENRISC_MEMBLOCK_H
20#define __ASM_OPENRISC_MEMBLOCK_H
21
22/* empty */
23
24#endif /* __ASM_OPENRISC_MEMBLOCK_H */
diff --git a/arch/openrisc/kernel/prom.c b/arch/openrisc/kernel/prom.c
index 1bb58ba89afa..3d4478f6c942 100644
--- a/arch/openrisc/kernel/prom.c
+++ b/arch/openrisc/kernel/prom.c
@@ -76,14 +76,13 @@ void __init early_init_devtree(void *params)
76 of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line); 76 of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line);
77 77
78 /* Scan memory nodes and rebuild MEMBLOCKs */ 78 /* Scan memory nodes and rebuild MEMBLOCKs */
79 memblock_init();
80 of_scan_flat_dt(early_init_dt_scan_root, NULL); 79 of_scan_flat_dt(early_init_dt_scan_root, NULL);
81 of_scan_flat_dt(early_init_dt_scan_memory, NULL); 80 of_scan_flat_dt(early_init_dt_scan_memory, NULL);
82 81
83 /* Save command line for /proc/cmdline and then parse parameters */ 82 /* Save command line for /proc/cmdline and then parse parameters */
84 strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); 83 strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
85 84
86 memblock_analyze(); 85 memblock_allow_resize();
87 86
88 /* We must copy the flattend device tree from init memory to regular 87 /* We must copy the flattend device tree from init memory to regular
89 * memory because the device tree references the strings in it 88 * memory because the device tree references the strings in it
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 951e18f5335b..ead0bc68439d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -117,6 +117,7 @@ config PPC
117 select HAVE_KRETPROBES 117 select HAVE_KRETPROBES
118 select HAVE_ARCH_TRACEHOOK 118 select HAVE_ARCH_TRACEHOOK
119 select HAVE_MEMBLOCK 119 select HAVE_MEMBLOCK
120 select HAVE_MEMBLOCK_NODE_MAP
120 select HAVE_DMA_ATTRS 121 select HAVE_DMA_ATTRS
121 select HAVE_DMA_API_DEBUG 122 select HAVE_DMA_API_DEBUG
122 select USE_GENERIC_SMP_HELPERS if SMP 123 select USE_GENERIC_SMP_HELPERS if SMP
@@ -421,9 +422,6 @@ config ARCH_SPARSEMEM_DEFAULT
421 def_bool y 422 def_bool y
422 depends on (SMP && PPC_PSERIES) || PPC_PS3 423 depends on (SMP && PPC_PSERIES) || PPC_PS3
423 424
424config ARCH_POPULATES_NODE_MAP
425 def_bool y
426
427config SYS_SUPPORTS_HUGETLBFS 425config SYS_SUPPORTS_HUGETLBFS
428 bool 426 bool
429 427
diff --git a/arch/powerpc/include/asm/memblock.h b/arch/powerpc/include/asm/memblock.h
deleted file mode 100644
index 43efc345065e..000000000000
--- a/arch/powerpc/include/asm/memblock.h
+++ /dev/null
@@ -1,8 +0,0 @@
1#ifndef _ASM_POWERPC_MEMBLOCK_H
2#define _ASM_POWERPC_MEMBLOCK_H
3
4#include <asm/udbg.h>
5
6#define MEMBLOCK_DBG(fmt...) udbg_printf(fmt)
7
8#endif /* _ASM_POWERPC_MEMBLOCK_H */
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 9ce1672afb59..a2158a395d96 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -107,9 +107,6 @@ void __init reserve_crashkernel(void)
107 unsigned long long crash_size, crash_base; 107 unsigned long long crash_size, crash_base;
108 int ret; 108 int ret;
109 109
110 /* this is necessary because of memblock_phys_mem_size() */
111 memblock_analyze();
112
113 /* use common parsing */ 110 /* use common parsing */
114 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), 111 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
115 &crash_size, &crash_base); 112 &crash_size, &crash_base);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fa1235b0503b..abe405dab34d 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -733,8 +733,6 @@ void __init early_init_devtree(void *params)
733 of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line); 733 of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line);
734 734
735 /* Scan memory nodes and rebuild MEMBLOCKs */ 735 /* Scan memory nodes and rebuild MEMBLOCKs */
736 memblock_init();
737
738 of_scan_flat_dt(early_init_dt_scan_root, NULL); 736 of_scan_flat_dt(early_init_dt_scan_root, NULL);
739 of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); 737 of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
740 738
@@ -756,20 +754,14 @@ void __init early_init_devtree(void *params)
756 early_reserve_mem(); 754 early_reserve_mem();
757 phyp_dump_reserve_mem(); 755 phyp_dump_reserve_mem();
758 756
759 limit = memory_limit; 757 /*
760 if (! limit) { 758 * Ensure that total memory size is page-aligned, because otherwise
761 phys_addr_t memsize; 759 * mark_bootmem() gets upset.
762 760 */
763 /* Ensure that total memory size is page-aligned, because 761 limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
764 * otherwise mark_bootmem() gets upset. */
765 memblock_analyze();
766 memsize = memblock_phys_mem_size();
767 if ((memsize & PAGE_MASK) != memsize)
768 limit = memsize & PAGE_MASK;
769 }
770 memblock_enforce_memory_limit(limit); 762 memblock_enforce_memory_limit(limit);
771 763
772 memblock_analyze(); 764 memblock_allow_resize();
773 memblock_dump_all(); 765 memblock_dump_all();
774 766
775 DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); 767 DBG("Phys. mem: %llx\n", memblock_phys_mem_size());
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 161cefde5c15..58861fa1220e 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -134,8 +134,7 @@ void __init MMU_init(void)
134 134
135 if (memblock.memory.cnt > 1) { 135 if (memblock.memory.cnt > 1) {
136#ifndef CONFIG_WII 136#ifndef CONFIG_WII
137 memblock.memory.cnt = 1; 137 memblock_enforce_memory_limit(memblock.memory.regions[0].size);
138 memblock_analyze();
139 printk(KERN_WARNING "Only using first contiguous memory region"); 138 printk(KERN_WARNING "Only using first contiguous memory region");
140#else 139#else
141 wii_memory_fixups(); 140 wii_memory_fixups();
@@ -158,7 +157,6 @@ void __init MMU_init(void)
158#ifndef CONFIG_HIGHMEM 157#ifndef CONFIG_HIGHMEM
159 total_memory = total_lowmem; 158 total_memory = total_lowmem;
160 memblock_enforce_memory_limit(total_lowmem); 159 memblock_enforce_memory_limit(total_lowmem);
161 memblock_analyze();
162#endif /* CONFIG_HIGHMEM */ 160#endif /* CONFIG_HIGHMEM */
163 } 161 }
164 162
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 2dd6bdd31fe1..8e2eb6611b0b 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -199,7 +199,7 @@ void __init do_init_bootmem(void)
199 unsigned long start_pfn, end_pfn; 199 unsigned long start_pfn, end_pfn;
200 start_pfn = memblock_region_memory_base_pfn(reg); 200 start_pfn = memblock_region_memory_base_pfn(reg);
201 end_pfn = memblock_region_memory_end_pfn(reg); 201 end_pfn = memblock_region_memory_end_pfn(reg);
202 add_active_range(0, start_pfn, end_pfn); 202 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
203 } 203 }
204 204
205 /* Add all physical memory to the bootmem map, mark each area 205 /* Add all physical memory to the bootmem map, mark each area
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b22a83a91cb8..e6eea0ac80c8 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -127,45 +127,25 @@ static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
127} 127}
128 128
129/* 129/*
130 * get_active_region_work_fn - A helper function for get_node_active_region 130 * get_node_active_region - Return active region containing pfn
131 * Returns datax set to the start_pfn and end_pfn if they contain
132 * the initial value of datax->start_pfn between them
133 * @start_pfn: start page(inclusive) of region to check
134 * @end_pfn: end page(exclusive) of region to check
135 * @datax: comes in with ->start_pfn set to value to search for and
136 * goes out with active range if it contains it
137 * Returns 1 if search value is in range else 0
138 */
139static int __init get_active_region_work_fn(unsigned long start_pfn,
140 unsigned long end_pfn, void *datax)
141{
142 struct node_active_region *data;
143 data = (struct node_active_region *)datax;
144
145 if (start_pfn <= data->start_pfn && end_pfn > data->start_pfn) {
146 data->start_pfn = start_pfn;
147 data->end_pfn = end_pfn;
148 return 1;
149 }
150 return 0;
151
152}
153
154/*
155 * get_node_active_region - Return active region containing start_pfn
156 * Active range returned is empty if none found. 131 * Active range returned is empty if none found.
157 * @start_pfn: The page to return the region for. 132 * @pfn: The page to return the region for
158 * @node_ar: Returned set to the active region containing start_pfn 133 * @node_ar: Returned set to the active region containing @pfn
159 */ 134 */
160static void __init get_node_active_region(unsigned long start_pfn, 135static void __init get_node_active_region(unsigned long pfn,
161 struct node_active_region *node_ar) 136 struct node_active_region *node_ar)
162{ 137{
163 int nid = early_pfn_to_nid(start_pfn); 138 unsigned long start_pfn, end_pfn;
139 int i, nid;
164 140
165 node_ar->nid = nid; 141 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
166 node_ar->start_pfn = start_pfn; 142 if (pfn >= start_pfn && pfn < end_pfn) {
167 node_ar->end_pfn = start_pfn; 143 node_ar->nid = nid;
168 work_with_active_regions(nid, get_active_region_work_fn, node_ar); 144 node_ar->start_pfn = start_pfn;
145 node_ar->end_pfn = end_pfn;
146 break;
147 }
148 }
169} 149}
170 150
171static void map_cpu_to_node(int cpu, int node) 151static void map_cpu_to_node(int cpu, int node)
@@ -710,9 +690,7 @@ static void __init parse_drconf_memory(struct device_node *memory)
710 node_set_online(nid); 690 node_set_online(nid);
711 sz = numa_enforce_memory_limit(base, size); 691 sz = numa_enforce_memory_limit(base, size);
712 if (sz) 692 if (sz)
713 add_active_range(nid, base >> PAGE_SHIFT, 693 memblock_set_node(base, sz, nid);
714 (base >> PAGE_SHIFT)
715 + (sz >> PAGE_SHIFT));
716 } while (--ranges); 694 } while (--ranges);
717 } 695 }
718} 696}
@@ -802,8 +780,7 @@ new_range:
802 continue; 780 continue;
803 } 781 }
804 782
805 add_active_range(nid, start >> PAGE_SHIFT, 783 memblock_set_node(start, size, nid);
806 (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
807 784
808 if (--ranges) 785 if (--ranges)
809 goto new_range; 786 goto new_range;
@@ -839,7 +816,8 @@ static void __init setup_nonnuma(void)
839 end_pfn = memblock_region_memory_end_pfn(reg); 816 end_pfn = memblock_region_memory_end_pfn(reg);
840 817
841 fake_numa_create_new_node(end_pfn, &nid); 818 fake_numa_create_new_node(end_pfn, &nid);
842 add_active_range(nid, start_pfn, end_pfn); 819 memblock_set_node(PFN_PHYS(start_pfn),
820 PFN_PHYS(end_pfn - start_pfn), nid);
843 node_set_online(nid); 821 node_set_online(nid);
844 } 822 }
845} 823}
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 4e13d6f9023e..573ba3b69d1f 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -615,7 +615,6 @@ static void __early_init_mmu(int boot_cpu)
615 615
616 /* limit memory so we dont have linear faults */ 616 /* limit memory so we dont have linear faults */
617 memblock_enforce_memory_limit(linear_map_top); 617 memblock_enforce_memory_limit(linear_map_top);
618 memblock_analyze();
619 618
620 patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); 619 patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
621 patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e); 620 patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e);
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index 1b5dc1a2e145..6d8dadf19f0b 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -79,24 +79,19 @@ void __init wii_memory_fixups(void)
79 BUG_ON(memblock.memory.cnt != 2); 79 BUG_ON(memblock.memory.cnt != 2);
80 BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base)); 80 BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base));
81 81
82 p[0].size = _ALIGN_DOWN(p[0].size, PAGE_SIZE); 82 /* trim unaligned tail */
83 p[1].size = _ALIGN_DOWN(p[1].size, PAGE_SIZE); 83 memblock_remove(ALIGN(p[1].base + p[1].size, PAGE_SIZE),
84 (phys_addr_t)ULLONG_MAX);
84 85
85 wii_hole_start = p[0].base + p[0].size; 86 /* determine hole, add & reserve them */
87 wii_hole_start = ALIGN(p[0].base + p[0].size, PAGE_SIZE);
86 wii_hole_size = p[1].base - wii_hole_start; 88 wii_hole_size = p[1].base - wii_hole_start;
87 89 memblock_add(wii_hole_start, wii_hole_size);
88 pr_info("MEM1: <%08llx %08llx>\n", p[0].base, p[0].size);
89 pr_info("HOLE: <%08lx %08lx>\n", wii_hole_start, wii_hole_size);
90 pr_info("MEM2: <%08llx %08llx>\n", p[1].base, p[1].size);
91
92 p[0].size += wii_hole_size + p[1].size;
93
94 memblock.memory.cnt = 1;
95 memblock_analyze();
96
97 /* reserve the hole */
98 memblock_reserve(wii_hole_start, wii_hole_size); 90 memblock_reserve(wii_hole_start, wii_hole_size);
99 91
92 BUG_ON(memblock.memory.cnt != 1);
93 __memblock_dump_all();
94
100 /* allow ioremapping the address space in the hole */ 95 /* allow ioremapping the address space in the hole */
101 __allow_ioremap_reserved = 1; 96 __allow_ioremap_reserved = 1;
102} 97}
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index 72714ad27842..8bd6ba542691 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -319,7 +319,6 @@ static int __init ps3_mm_add_memory(void)
319 } 319 }
320 320
321 memblock_add(start_addr, map.r1.size); 321 memblock_add(start_addr, map.r1.size);
322 memblock_analyze();
323 322
324 result = online_pages(start_pfn, nr_pages); 323 result = online_pages(start_pfn, nr_pages);
325 324
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 373679b3744a..d48ede334434 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -92,6 +92,9 @@ config S390
92 select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 92 select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
93 select HAVE_RCU_TABLE_FREE if SMP 93 select HAVE_RCU_TABLE_FREE if SMP
94 select ARCH_SAVE_PAGE_KEYS if HIBERNATION 94 select ARCH_SAVE_PAGE_KEYS if HIBERNATION
95 select HAVE_MEMBLOCK
96 select HAVE_MEMBLOCK_NODE_MAP
97 select ARCH_DISCARD_MEMBLOCK
95 select ARCH_INLINE_SPIN_TRYLOCK 98 select ARCH_INLINE_SPIN_TRYLOCK
96 select ARCH_INLINE_SPIN_TRYLOCK_BH 99 select ARCH_INLINE_SPIN_TRYLOCK_BH
97 select ARCH_INLINE_SPIN_LOCK 100 select ARCH_INLINE_SPIN_LOCK
@@ -345,9 +348,6 @@ config WARN_DYNAMIC_STACK
345 348
346 Say N if you are unsure. 349 Say N if you are unsure.
347 350
348config ARCH_POPULATES_NODE_MAP
349 def_bool y
350
351comment "Kernel preemption" 351comment "Kernel preemption"
352 352
353source "kernel/Kconfig.preempt" 353source "kernel/Kconfig.preempt"
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index e54c4ff8abaa..f11d1b037c50 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -21,6 +21,7 @@
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/kernel.h> 23#include <linux/kernel.h>
24#include <linux/memblock.h>
24#include <linux/mm.h> 25#include <linux/mm.h>
25#include <linux/stddef.h> 26#include <linux/stddef.h>
26#include <linux/unistd.h> 27#include <linux/unistd.h>
@@ -820,7 +821,8 @@ setup_memory(void)
820 end_chunk = min(end_chunk, end_pfn); 821 end_chunk = min(end_chunk, end_pfn);
821 if (start_chunk >= end_chunk) 822 if (start_chunk >= end_chunk)
822 continue; 823 continue;
823 add_active_range(0, start_chunk, end_chunk); 824 memblock_add_node(PFN_PHYS(start_chunk),
825 PFN_PHYS(end_chunk - start_chunk), 0);
824 pfn = max(start_chunk, start_pfn); 826 pfn = max(start_chunk, start_pfn);
825 for (; pfn < end_chunk; pfn++) 827 for (; pfn < end_chunk; pfn++)
826 page_set_storage_key(PFN_PHYS(pfn), 828 page_set_storage_key(PFN_PHYS(pfn),
diff --git a/arch/score/Kconfig b/arch/score/Kconfig
index df169e84db4e..8b0c9464aa9d 100644
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -4,6 +4,9 @@ config SCORE
4 def_bool y 4 def_bool y
5 select HAVE_GENERIC_HARDIRQS 5 select HAVE_GENERIC_HARDIRQS
6 select GENERIC_IRQ_SHOW 6 select GENERIC_IRQ_SHOW
7 select HAVE_MEMBLOCK
8 select HAVE_MEMBLOCK_NODE_MAP
9 select ARCH_DISCARD_MEMBLOCK
7 10
8choice 11choice
9 prompt "System type" 12 prompt "System type"
@@ -60,9 +63,6 @@ config 32BIT
60config ARCH_FLATMEM_ENABLE 63config ARCH_FLATMEM_ENABLE
61 def_bool y 64 def_bool y
62 65
63config ARCH_POPULATES_NODE_MAP
64 def_bool y
65
66source "mm/Kconfig" 66source "mm/Kconfig"
67 67
68config MEMORY_START 68config MEMORY_START
diff --git a/arch/score/kernel/setup.c b/arch/score/kernel/setup.c
index 6f898c057878..b48459afefdd 100644
--- a/arch/score/kernel/setup.c
+++ b/arch/score/kernel/setup.c
@@ -26,6 +26,7 @@
26#include <linux/bootmem.h> 26#include <linux/bootmem.h>
27#include <linux/initrd.h> 27#include <linux/initrd.h>
28#include <linux/ioport.h> 28#include <linux/ioport.h>
29#include <linux/memblock.h>
29#include <linux/mm.h> 30#include <linux/mm.h>
30#include <linux/seq_file.h> 31#include <linux/seq_file.h>
31#include <linux/screen_info.h> 32#include <linux/screen_info.h>
@@ -54,7 +55,8 @@ static void __init bootmem_init(void)
54 /* Initialize the boot-time allocator with low memory only. */ 55 /* Initialize the boot-time allocator with low memory only. */
55 bootmap_size = init_bootmem_node(NODE_DATA(0), start_pfn, 56 bootmap_size = init_bootmem_node(NODE_DATA(0), start_pfn,
56 min_low_pfn, max_low_pfn); 57 min_low_pfn, max_low_pfn);
57 add_active_range(0, min_low_pfn, max_low_pfn); 58 memblock_add_node(PFN_PHYS(min_low_pfn),
59 PFN_PHYS(max_low_pfn - min_low_pfn), 0);
58 60
59 free_bootmem(PFN_PHYS(start_pfn), 61 free_bootmem(PFN_PHYS(start_pfn),
60 (max_low_pfn - start_pfn) << PAGE_SHIFT); 62 (max_low_pfn - start_pfn) << PAGE_SHIFT);
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 5629e2099130..47a2f1c2cb0d 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -4,6 +4,7 @@ config SUPERH
4 select CLKDEV_LOOKUP 4 select CLKDEV_LOOKUP
5 select HAVE_IDE if HAS_IOPORT 5 select HAVE_IDE if HAS_IOPORT
6 select HAVE_MEMBLOCK 6 select HAVE_MEMBLOCK
7 select HAVE_MEMBLOCK_NODE_MAP
7 select HAVE_OPROFILE 8 select HAVE_OPROFILE
8 select HAVE_GENERIC_DMA_COHERENT 9 select HAVE_GENERIC_DMA_COHERENT
9 select HAVE_ARCH_TRACEHOOK 10 select HAVE_ARCH_TRACEHOOK
diff --git a/arch/sh/include/asm/memblock.h b/arch/sh/include/asm/memblock.h
deleted file mode 100644
index e87063fad2ea..000000000000
--- a/arch/sh/include/asm/memblock.h
+++ /dev/null
@@ -1,4 +0,0 @@
1#ifndef __ASM_SH_MEMBLOCK_H
2#define __ASM_SH_MEMBLOCK_H
3
4#endif /* __ASM_SH_MEMBLOCK_H */
diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index c5a33f007f88..9fea49f6e667 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -157,9 +157,6 @@ void __init reserve_crashkernel(void)
157 unsigned long long crash_size, crash_base; 157 unsigned long long crash_size, crash_base;
158 int ret; 158 int ret;
159 159
160 /* this is necessary because of memblock_phys_mem_size() */
161 memblock_analyze();
162
163 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), 160 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
164 &crash_size, &crash_base); 161 &crash_size, &crash_base);
165 if (ret == 0 && crash_size > 0) { 162 if (ret == 0 && crash_size > 0) {
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 1a0e946679a4..7b57bf1dc855 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -230,7 +230,8 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
230 pmb_bolt_mapping((unsigned long)__va(start), start, end - start, 230 pmb_bolt_mapping((unsigned long)__va(start), start, end - start,
231 PAGE_KERNEL); 231 PAGE_KERNEL);
232 232
233 add_active_range(nid, start_pfn, end_pfn); 233 memblock_set_node(PFN_PHYS(start_pfn),
234 PFN_PHYS(end_pfn - start_pfn), nid);
234} 235}
235 236
236void __init __weak plat_early_device_setup(void) 237void __init __weak plat_early_device_setup(void)
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index c3e61b366493..cb8f9920f4dd 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -143,9 +143,6 @@ config MAX_ACTIVE_REGIONS
143 CPU_SUBTYPE_SH7785) 143 CPU_SUBTYPE_SH7785)
144 default "1" 144 default "1"
145 145
146config ARCH_POPULATES_NODE_MAP
147 def_bool y
148
149config ARCH_SELECT_MEMORY_MODEL 146config ARCH_SELECT_MEMORY_MODEL
150 def_bool y 147 def_bool y
151 148
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 939ca0f356f6..82cc576fab15 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -324,7 +324,6 @@ void __init paging_init(void)
324 unsigned long vaddr, end; 324 unsigned long vaddr, end;
325 int nid; 325 int nid;
326 326
327 memblock_init();
328 sh_mv.mv_mem_init(); 327 sh_mv.mv_mem_init();
329 328
330 early_reserve_mem(); 329 early_reserve_mem();
@@ -337,7 +336,7 @@ void __init paging_init(void)
337 sh_mv.mv_mem_reserve(); 336 sh_mv.mv_mem_reserve();
338 337
339 memblock_enforce_memory_limit(memory_limit); 338 memblock_enforce_memory_limit(memory_limit);
340 memblock_analyze(); 339 memblock_allow_resize();
341 340
342 memblock_dump_all(); 341 memblock_dump_all();
343 342
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index f92602e86607..70ae9d81870e 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -43,6 +43,7 @@ config SPARC64
43 select HAVE_KPROBES 43 select HAVE_KPROBES
44 select HAVE_RCU_TABLE_FREE if SMP 44 select HAVE_RCU_TABLE_FREE if SMP
45 select HAVE_MEMBLOCK 45 select HAVE_MEMBLOCK
46 select HAVE_MEMBLOCK_NODE_MAP
46 select HAVE_SYSCALL_WRAPPERS 47 select HAVE_SYSCALL_WRAPPERS
47 select HAVE_DYNAMIC_FTRACE 48 select HAVE_DYNAMIC_FTRACE
48 select HAVE_FTRACE_MCOUNT_RECORD 49 select HAVE_FTRACE_MCOUNT_RECORD
@@ -352,9 +353,6 @@ config NODES_SPAN_OTHER_NODES
352 def_bool y 353 def_bool y
353 depends on NEED_MULTIPLE_NODES 354 depends on NEED_MULTIPLE_NODES
354 355
355config ARCH_POPULATES_NODE_MAP
356 def_bool y if SPARC64
357
358config ARCH_SELECT_MEMORY_MODEL 356config ARCH_SELECT_MEMORY_MODEL
359 def_bool y if SPARC64 357 def_bool y if SPARC64
360 358
diff --git a/arch/sparc/include/asm/memblock.h b/arch/sparc/include/asm/memblock.h
deleted file mode 100644
index c67b047ef85e..000000000000
--- a/arch/sparc/include/asm/memblock.h
+++ /dev/null
@@ -1,8 +0,0 @@
1#ifndef _SPARC64_MEMBLOCK_H
2#define _SPARC64_MEMBLOCK_H
3
4#include <asm/oplib.h>
5
6#define MEMBLOCK_DBG(fmt...) prom_printf(fmt)
7
8#endif /* !(_SPARC64_MEMBLOCK_H) */
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 8e073d802139..b3f5e7dfea51 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -790,7 +790,7 @@ static int find_node(unsigned long addr)
790 return -1; 790 return -1;
791} 791}
792 792
793u64 memblock_nid_range(u64 start, u64 end, int *nid) 793static u64 memblock_nid_range(u64 start, u64 end, int *nid)
794{ 794{
795 *nid = find_node(start); 795 *nid = find_node(start);
796 start += PAGE_SIZE; 796 start += PAGE_SIZE;
@@ -808,7 +808,7 @@ u64 memblock_nid_range(u64 start, u64 end, int *nid)
808 return start; 808 return start;
809} 809}
810#else 810#else
811u64 memblock_nid_range(u64 start, u64 end, int *nid) 811static u64 memblock_nid_range(u64 start, u64 end, int *nid)
812{ 812{
813 *nid = 0; 813 *nid = 0;
814 return end; 814 return end;
@@ -816,7 +816,7 @@ u64 memblock_nid_range(u64 start, u64 end, int *nid)
816#endif 816#endif
817 817
818/* This must be invoked after performing all of the necessary 818/* This must be invoked after performing all of the necessary
819 * add_active_range() calls for 'nid'. We need to be able to get 819 * memblock_set_node() calls for 'nid'. We need to be able to get
820 * correct data from get_pfn_range_for_nid(). 820 * correct data from get_pfn_range_for_nid().
821 */ 821 */
822static void __init allocate_node_data(int nid) 822static void __init allocate_node_data(int nid)
@@ -987,14 +987,11 @@ static void __init add_node_ranges(void)
987 987
988 this_end = memblock_nid_range(start, end, &nid); 988 this_end = memblock_nid_range(start, end, &nid);
989 989
990 numadbg("Adding active range nid[%d] " 990 numadbg("Setting memblock NUMA node nid[%d] "
991 "start[%lx] end[%lx]\n", 991 "start[%lx] end[%lx]\n",
992 nid, start, this_end); 992 nid, start, this_end);
993 993
994 add_active_range(nid, 994 memblock_set_node(start, this_end - start, nid);
995 start >> PAGE_SHIFT,
996 this_end >> PAGE_SHIFT);
997
998 start = this_end; 995 start = this_end;
999 } 996 }
1000 } 997 }
@@ -1282,7 +1279,6 @@ static void __init bootmem_init_nonnuma(void)
1282{ 1279{
1283 unsigned long top_of_ram = memblock_end_of_DRAM(); 1280 unsigned long top_of_ram = memblock_end_of_DRAM();
1284 unsigned long total_ram = memblock_phys_mem_size(); 1281 unsigned long total_ram = memblock_phys_mem_size();
1285 struct memblock_region *reg;
1286 1282
1287 numadbg("bootmem_init_nonnuma()\n"); 1283 numadbg("bootmem_init_nonnuma()\n");
1288 1284
@@ -1292,20 +1288,8 @@ static void __init bootmem_init_nonnuma(void)
1292 (top_of_ram - total_ram) >> 20); 1288 (top_of_ram - total_ram) >> 20);
1293 1289
1294 init_node_masks_nonnuma(); 1290 init_node_masks_nonnuma();
1295 1291 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
1296 for_each_memblock(memory, reg) {
1297 unsigned long start_pfn, end_pfn;
1298
1299 if (!reg->size)
1300 continue;
1301
1302 start_pfn = memblock_region_memory_base_pfn(reg);
1303 end_pfn = memblock_region_memory_end_pfn(reg);
1304 add_active_range(0, start_pfn, end_pfn);
1305 }
1306
1307 allocate_node_data(0); 1292 allocate_node_data(0);
1308
1309 node_set_online(0); 1293 node_set_online(0);
1310} 1294}
1311 1295
@@ -1769,8 +1753,6 @@ void __init paging_init(void)
1769 sun4v_ktsb_init(); 1753 sun4v_ktsb_init();
1770 } 1754 }
1771 1755
1772 memblock_init();
1773
1774 /* Find available physical memory... 1756 /* Find available physical memory...
1775 * 1757 *
1776 * Read it twice in order to work around a bug in openfirmware. 1758 * Read it twice in order to work around a bug in openfirmware.
@@ -1796,7 +1778,7 @@ void __init paging_init(void)
1796 1778
1797 memblock_enforce_memory_limit(cmdline_memory_size); 1779 memblock_enforce_memory_limit(cmdline_memory_size);
1798 1780
1799 memblock_analyze(); 1781 memblock_allow_resize();
1800 memblock_dump_all(); 1782 memblock_dump_all();
1801 1783
1802 set_bit(0, mmu_context_bmap); 1784 set_bit(0, mmu_context_bmap);
diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c
index 471b6bca8da4..673d7a89d8ff 100644
--- a/arch/unicore32/kernel/setup.c
+++ b/arch/unicore32/kernel/setup.c
@@ -37,6 +37,7 @@
37#include <asm/cacheflush.h> 37#include <asm/cacheflush.h>
38#include <asm/tlbflush.h> 38#include <asm/tlbflush.h>
39#include <asm/traps.h> 39#include <asm/traps.h>
40#include <asm/memblock.h>
40 41
41#include "setup.h" 42#include "setup.h"
42 43
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 3b379cddbc64..de186bde8975 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -26,6 +26,7 @@
26#include <asm/setup.h> 26#include <asm/setup.h>
27#include <asm/sizes.h> 27#include <asm/sizes.h>
28#include <asm/tlb.h> 28#include <asm/tlb.h>
29#include <asm/memblock.h>
29#include <mach/map.h> 30#include <mach/map.h>
30 31
31#include "mm.h" 32#include "mm.h"
@@ -245,7 +246,6 @@ void __init uc32_memblock_init(struct meminfo *mi)
245 sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), 246 sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]),
246 meminfo_cmp, NULL); 247 meminfo_cmp, NULL);
247 248
248 memblock_init();
249 for (i = 0; i < mi->nr_banks; i++) 249 for (i = 0; i < mi->nr_banks; i++)
250 memblock_add(mi->bank[i].start, mi->bank[i].size); 250 memblock_add(mi->bank[i].start, mi->bank[i].size);
251 251
@@ -264,7 +264,7 @@ void __init uc32_memblock_init(struct meminfo *mi)
264 264
265 uc32_mm_memblock_reserve(); 265 uc32_mm_memblock_reserve();
266 266
267 memblock_analyze(); 267 memblock_allow_resize();
268 memblock_dump_all(); 268 memblock_dump_all();
269} 269}
270 270
diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c
index 3e5c3e5a0b45..43c20b40e444 100644
--- a/arch/unicore32/mm/mmu.c
+++ b/arch/unicore32/mm/mmu.c
@@ -25,6 +25,7 @@
25#include <asm/setup.h> 25#include <asm/setup.h>
26#include <asm/sizes.h> 26#include <asm/sizes.h>
27#include <asm/tlb.h> 27#include <asm/tlb.h>
28#include <asm/memblock.h>
28 29
29#include <mach/map.h> 30#include <mach/map.h>
30 31
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index efb42949cc09..67d6af3581bc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -26,6 +26,8 @@ config X86
26 select HAVE_IOREMAP_PROT 26 select HAVE_IOREMAP_PROT
27 select HAVE_KPROBES 27 select HAVE_KPROBES
28 select HAVE_MEMBLOCK 28 select HAVE_MEMBLOCK
29 select HAVE_MEMBLOCK_NODE_MAP
30 select ARCH_DISCARD_MEMBLOCK
29 select ARCH_WANT_OPTIONAL_GPIOLIB 31 select ARCH_WANT_OPTIONAL_GPIOLIB
30 select ARCH_WANT_FRAME_POINTERS 32 select ARCH_WANT_FRAME_POINTERS
31 select HAVE_DMA_ATTRS 33 select HAVE_DMA_ATTRS
@@ -204,9 +206,6 @@ config ZONE_DMA32
204 bool 206 bool
205 default X86_64 207 default X86_64
206 208
207config ARCH_POPULATES_NODE_MAP
208 def_bool y
209
210config AUDIT_ARCH 209config AUDIT_ARCH
211 bool 210 bool
212 default X86_64 211 default X86_64
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 908b96957d88..37782566af24 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -117,7 +117,7 @@ static inline void early_memtest(unsigned long start, unsigned long end)
117 117
118extern unsigned long e820_end_of_ram_pfn(void); 118extern unsigned long e820_end_of_ram_pfn(void);
119extern unsigned long e820_end_of_low_ram_pfn(void); 119extern unsigned long e820_end_of_low_ram_pfn(void);
120extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); 120extern u64 early_reserve_e820(u64 sizet, u64 align);
121 121
122void memblock_x86_fill(void); 122void memblock_x86_fill(void);
123void memblock_find_dma_reserve(void); 123void memblock_find_dma_reserve(void);
diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h
deleted file mode 100644
index 0cd3800f33b9..000000000000
--- a/arch/x86/include/asm/memblock.h
+++ /dev/null
@@ -1,23 +0,0 @@
1#ifndef _X86_MEMBLOCK_H
2#define _X86_MEMBLOCK_H
3
4#define ARCH_DISCARD_MEMBLOCK
5
6u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align);
7
8void memblock_x86_reserve_range(u64 start, u64 end, char *name);
9void memblock_x86_free_range(u64 start, u64 end);
10struct range;
11int __get_free_all_memory_range(struct range **range, int nodeid,
12 unsigned long start_pfn, unsigned long end_pfn);
13int get_free_all_memory_range(struct range **rangep, int nodeid);
14
15void memblock_x86_register_active_regions(int nid, unsigned long start_pfn,
16 unsigned long last_pfn);
17u64 memblock_x86_hole_size(u64 start, u64 end);
18u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align);
19u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit);
20u64 memblock_x86_memory_in_range(u64 addr, u64 limit);
21bool memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align);
22
23#endif
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 3d2661ca6542..6e76c191a835 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -88,13 +88,13 @@ static u32 __init allocate_aperture(void)
88 */ 88 */
89 addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, 89 addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,
90 aper_size, aper_size); 90 aper_size, aper_size);
91 if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) { 91 if (!addr || addr + aper_size > GART_MAX_ADDR) {
92 printk(KERN_ERR 92 printk(KERN_ERR
93 "Cannot allocate aperture memory hole (%lx,%uK)\n", 93 "Cannot allocate aperture memory hole (%lx,%uK)\n",
94 addr, aper_size>>10); 94 addr, aper_size>>10);
95 return 0; 95 return 0;
96 } 96 }
97 memblock_x86_reserve_range(addr, addr + aper_size, "aperture64"); 97 memblock_reserve(addr, aper_size);
98 /* 98 /*
99 * Kmemleak should not scan this block as it may not be mapped via the 99 * Kmemleak should not scan this block as it may not be mapped via the
100 * kernel direct mapping. 100 * kernel direct mapping.
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 452932d34730..5da1269e8ddc 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -62,7 +62,8 @@ early_param("memory_corruption_check_size", set_corruption_check_size);
62 62
63void __init setup_bios_corruption_check(void) 63void __init setup_bios_corruption_check(void)
64{ 64{
65 u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ 65 phys_addr_t start, end;
66 u64 i;
66 67
67 if (memory_corruption_check == -1) { 68 if (memory_corruption_check == -1) {
68 memory_corruption_check = 69 memory_corruption_check =
@@ -82,28 +83,23 @@ void __init setup_bios_corruption_check(void)
82 83
83 corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); 84 corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
84 85
85 while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { 86 for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) {
86 u64 size; 87 start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE),
87 addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE); 88 PAGE_SIZE, corruption_check_size);
89 end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE),
90 PAGE_SIZE, corruption_check_size);
91 if (start >= end)
92 continue;
88 93
89 if (addr == MEMBLOCK_ERROR) 94 memblock_reserve(start, end - start);
90 break; 95 scan_areas[num_scan_areas].addr = start;
91 96 scan_areas[num_scan_areas].size = end - start;
92 if (addr >= corruption_check_size)
93 break;
94
95 if ((addr + size) > corruption_check_size)
96 size = corruption_check_size - addr;
97
98 memblock_x86_reserve_range(addr, addr + size, "SCAN RAM");
99 scan_areas[num_scan_areas].addr = addr;
100 scan_areas[num_scan_areas].size = size;
101 num_scan_areas++;
102 97
103 /* Assume we've already mapped this early memory */ 98 /* Assume we've already mapped this early memory */
104 memset(__va(addr), 0, size); 99 memset(__va(start), 0, end - start);
105 100
106 addr += size; 101 if (++num_scan_areas >= MAX_SCAN_AREAS)
102 break;
107 } 103 }
108 104
109 if (num_scan_areas) 105 if (num_scan_areas)
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 303a0e48f076..8071e2f3d6eb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -738,35 +738,17 @@ core_initcall(e820_mark_nvs_memory);
738/* 738/*
739 * pre allocated 4k and reserved it in memblock and e820_saved 739 * pre allocated 4k and reserved it in memblock and e820_saved
740 */ 740 */
741u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) 741u64 __init early_reserve_e820(u64 size, u64 align)
742{ 742{
743 u64 size = 0;
744 u64 addr; 743 u64 addr;
745 u64 start;
746 744
747 for (start = startt; ; start += size) { 745 addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
748 start = memblock_x86_find_in_range_size(start, &size, align); 746 if (addr) {
749 if (start == MEMBLOCK_ERROR) 747 e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED);
750 return 0; 748 printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
751 if (size >= sizet) 749 update_e820_saved();
752 break;
753 } 750 }
754 751
755#ifdef CONFIG_X86_32
756 if (start >= MAXMEM)
757 return 0;
758 if (start + size > MAXMEM)
759 size = MAXMEM - start;
760#endif
761
762 addr = round_down(start + size - sizet, align);
763 if (addr < start)
764 return 0;
765 memblock_x86_reserve_range(addr, addr + sizet, "new next");
766 e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
767 printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
768 update_e820_saved();
769
770 return addr; 752 return addr;
771} 753}
772 754
@@ -1090,7 +1072,7 @@ void __init memblock_x86_fill(void)
1090 * We are safe to enable resizing, beause memblock_x86_fill() 1072 * We are safe to enable resizing, beause memblock_x86_fill()
1091 * is rather later for x86 1073 * is rather later for x86
1092 */ 1074 */
1093 memblock_can_resize = 1; 1075 memblock_allow_resize();
1094 1076
1095 for (i = 0; i < e820.nr_map; i++) { 1077 for (i = 0; i < e820.nr_map; i++) {
1096 struct e820entry *ei = &e820.map[i]; 1078 struct e820entry *ei = &e820.map[i];
@@ -1105,22 +1087,36 @@ void __init memblock_x86_fill(void)
1105 memblock_add(ei->addr, ei->size); 1087 memblock_add(ei->addr, ei->size);
1106 } 1088 }
1107 1089
1108 memblock_analyze();
1109 memblock_dump_all(); 1090 memblock_dump_all();
1110} 1091}
1111 1092
1112void __init memblock_find_dma_reserve(void) 1093void __init memblock_find_dma_reserve(void)
1113{ 1094{
1114#ifdef CONFIG_X86_64 1095#ifdef CONFIG_X86_64
1115 u64 free_size_pfn; 1096 u64 nr_pages = 0, nr_free_pages = 0;
1116 u64 mem_size_pfn; 1097 unsigned long start_pfn, end_pfn;
1098 phys_addr_t start, end;
1099 int i;
1100 u64 u;
1101
1117 /* 1102 /*
1118 * need to find out used area below MAX_DMA_PFN 1103 * need to find out used area below MAX_DMA_PFN
1119 * need to use memblock to get free size in [0, MAX_DMA_PFN] 1104 * need to use memblock to get free size in [0, MAX_DMA_PFN]
1120 * at first, and assume boot_mem will not take below MAX_DMA_PFN 1105 * at first, and assume boot_mem will not take below MAX_DMA_PFN
1121 */ 1106 */
1122 mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; 1107 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
1123 free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; 1108 start_pfn = min_t(unsigned long, start_pfn, MAX_DMA_PFN);
1124 set_dma_reserve(mem_size_pfn - free_size_pfn); 1109 end_pfn = min_t(unsigned long, end_pfn, MAX_DMA_PFN);
1110 nr_pages += end_pfn - start_pfn;
1111 }
1112
1113 for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) {
1114 start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN);
1115 end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN);
1116 if (start_pfn < end_pfn)
1117 nr_free_pages += end_pfn - start_pfn;
1118 }
1119
1120 set_dma_reserve(nr_pages - nr_free_pages);
1125#endif 1121#endif
1126} 1122}
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index af0699ba48cf..48d9d4ea1020 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -52,5 +52,5 @@ void __init reserve_ebda_region(void)
52 lowmem = 0x9f000; 52 lowmem = 0x9f000;
53 53
54 /* reserve all memory between lowmem and the 1MB mark */ 54 /* reserve all memory between lowmem and the 1MB mark */
55 memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved"); 55 memblock_reserve(lowmem, 0x100000 - lowmem);
56} 56}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 3bb08509a7a1..51ff18616d50 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -31,9 +31,8 @@ static void __init i386_default_early_setup(void)
31 31
32void __init i386_start_kernel(void) 32void __init i386_start_kernel(void)
33{ 33{
34 memblock_init(); 34 memblock_reserve(__pa_symbol(&_text),
35 35 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
36 memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
37 36
38#ifdef CONFIG_BLK_DEV_INITRD 37#ifdef CONFIG_BLK_DEV_INITRD
39 /* Reserve INITRD */ 38 /* Reserve INITRD */
@@ -42,7 +41,7 @@ void __init i386_start_kernel(void)
42 u64 ramdisk_image = boot_params.hdr.ramdisk_image; 41 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
43 u64 ramdisk_size = boot_params.hdr.ramdisk_size; 42 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
44 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); 43 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
45 memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); 44 memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
46 } 45 }
47#endif 46#endif
48 47
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 5655c2272adb..3a3b779f41d3 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -98,9 +98,8 @@ void __init x86_64_start_reservations(char *real_mode_data)
98{ 98{
99 copy_bootdata(__va(real_mode_data)); 99 copy_bootdata(__va(real_mode_data));
100 100
101 memblock_init(); 101 memblock_reserve(__pa_symbol(&_text),
102 102 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
103 memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
104 103
105#ifdef CONFIG_BLK_DEV_INITRD 104#ifdef CONFIG_BLK_DEV_INITRD
106 /* Reserve INITRD */ 105 /* Reserve INITRD */
@@ -109,7 +108,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
109 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; 108 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
110 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; 109 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
111 unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); 110 unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
112 memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); 111 memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
113 } 112 }
114#endif 113#endif
115 114
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 0741b062a304..ca470e4c92dc 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -564,9 +564,7 @@ void __init default_get_smp_config(unsigned int early)
564 564
565static void __init smp_reserve_memory(struct mpf_intel *mpf) 565static void __init smp_reserve_memory(struct mpf_intel *mpf)
566{ 566{
567 unsigned long size = get_mpc_size(mpf->physptr); 567 memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr));
568
569 memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc");
570} 568}
571 569
572static int __init smp_scan_config(unsigned long base, unsigned long length) 570static int __init smp_scan_config(unsigned long base, unsigned long length)
@@ -595,7 +593,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
595 mpf, (u64)virt_to_phys(mpf)); 593 mpf, (u64)virt_to_phys(mpf));
596 594
597 mem = virt_to_phys(mpf); 595 mem = virt_to_phys(mpf);
598 memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf"); 596 memblock_reserve(mem, sizeof(*mpf));
599 if (mpf->physptr) 597 if (mpf->physptr)
600 smp_reserve_memory(mpf); 598 smp_reserve_memory(mpf);
601 599
@@ -836,10 +834,8 @@ early_param("alloc_mptable", parse_alloc_mptable_opt);
836 834
837void __init early_reserve_e820_mpc_new(void) 835void __init early_reserve_e820_mpc_new(void)
838{ 836{
839 if (enable_update_mptable && alloc_mptable) { 837 if (enable_update_mptable && alloc_mptable)
840 u64 startt = 0; 838 mpc_new_phys = early_reserve_e820(mpc_new_length, 4);
841 mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
842 }
843} 839}
844 840
845static int __init update_mp_table(void) 841static int __init update_mp_table(void)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cf0ef986cb6d..d05444ac2aea 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -306,7 +306,8 @@ static void __init cleanup_highmap(void)
306static void __init reserve_brk(void) 306static void __init reserve_brk(void)
307{ 307{
308 if (_brk_end > _brk_start) 308 if (_brk_end > _brk_start)
309 memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK"); 309 memblock_reserve(__pa(_brk_start),
310 __pa(_brk_end) - __pa(_brk_start));
310 311
311 /* Mark brk area as locked down and no longer taking any 312 /* Mark brk area as locked down and no longer taking any
312 new allocations */ 313 new allocations */
@@ -331,13 +332,13 @@ static void __init relocate_initrd(void)
331 ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, 332 ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
332 PAGE_SIZE); 333 PAGE_SIZE);
333 334
334 if (ramdisk_here == MEMBLOCK_ERROR) 335 if (!ramdisk_here)
335 panic("Cannot find place for new RAMDISK of size %lld\n", 336 panic("Cannot find place for new RAMDISK of size %lld\n",
336 ramdisk_size); 337 ramdisk_size);
337 338
338 /* Note: this includes all the lowmem currently occupied by 339 /* Note: this includes all the lowmem currently occupied by
339 the initrd, we rely on that fact to keep the data intact. */ 340 the initrd, we rely on that fact to keep the data intact. */
340 memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK"); 341 memblock_reserve(ramdisk_here, area_size);
341 initrd_start = ramdisk_here + PAGE_OFFSET; 342 initrd_start = ramdisk_here + PAGE_OFFSET;
342 initrd_end = initrd_start + ramdisk_size; 343 initrd_end = initrd_start + ramdisk_size;
343 printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", 344 printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
@@ -393,7 +394,7 @@ static void __init reserve_initrd(void)
393 initrd_start = 0; 394 initrd_start = 0;
394 395
395 if (ramdisk_size >= (end_of_lowmem>>1)) { 396 if (ramdisk_size >= (end_of_lowmem>>1)) {
396 memblock_x86_free_range(ramdisk_image, ramdisk_end); 397 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
397 printk(KERN_ERR "initrd too large to handle, " 398 printk(KERN_ERR "initrd too large to handle, "
398 "disabling initrd\n"); 399 "disabling initrd\n");
399 return; 400 return;
@@ -416,7 +417,7 @@ static void __init reserve_initrd(void)
416 417
417 relocate_initrd(); 418 relocate_initrd();
418 419
419 memblock_x86_free_range(ramdisk_image, ramdisk_end); 420 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
420} 421}
421#else 422#else
422static void __init reserve_initrd(void) 423static void __init reserve_initrd(void)
@@ -490,15 +491,13 @@ static void __init memblock_x86_reserve_range_setup_data(void)
490{ 491{
491 struct setup_data *data; 492 struct setup_data *data;
492 u64 pa_data; 493 u64 pa_data;
493 char buf[32];
494 494
495 if (boot_params.hdr.version < 0x0209) 495 if (boot_params.hdr.version < 0x0209)
496 return; 496 return;
497 pa_data = boot_params.hdr.setup_data; 497 pa_data = boot_params.hdr.setup_data;
498 while (pa_data) { 498 while (pa_data) {
499 data = early_memremap(pa_data, sizeof(*data)); 499 data = early_memremap(pa_data, sizeof(*data));
500 sprintf(buf, "setup data %x", data->type); 500 memblock_reserve(pa_data, sizeof(*data) + data->len);
501 memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf);
502 pa_data = data->next; 501 pa_data = data->next;
503 early_iounmap(data, sizeof(*data)); 502 early_iounmap(data, sizeof(*data));
504 } 503 }
@@ -554,7 +553,7 @@ static void __init reserve_crashkernel(void)
554 crash_base = memblock_find_in_range(alignment, 553 crash_base = memblock_find_in_range(alignment,
555 CRASH_KERNEL_ADDR_MAX, crash_size, alignment); 554 CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
556 555
557 if (crash_base == MEMBLOCK_ERROR) { 556 if (!crash_base) {
558 pr_info("crashkernel reservation failed - No suitable area found.\n"); 557 pr_info("crashkernel reservation failed - No suitable area found.\n");
559 return; 558 return;
560 } 559 }
@@ -568,7 +567,7 @@ static void __init reserve_crashkernel(void)
568 return; 567 return;
569 } 568 }
570 } 569 }
571 memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL"); 570 memblock_reserve(crash_base, crash_size);
572 571
573 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " 572 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
574 "for crashkernel (System RAM: %ldMB)\n", 573 "for crashkernel (System RAM: %ldMB)\n",
@@ -626,7 +625,7 @@ static __init void reserve_ibft_region(void)
626 addr = find_ibft_region(&size); 625 addr = find_ibft_region(&size);
627 626
628 if (size) 627 if (size)
629 memblock_x86_reserve_range(addr, addr + size, "* ibft"); 628 memblock_reserve(addr, size);
630} 629}
631 630
632static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; 631static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index a91ae7709b49..a73b61055ad6 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -14,11 +14,11 @@ void __init setup_trampolines(void)
14 14
15 /* Has to be in very low memory so we can execute real-mode AP code. */ 15 /* Has to be in very low memory so we can execute real-mode AP code. */
16 mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); 16 mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
17 if (mem == MEMBLOCK_ERROR) 17 if (!mem)
18 panic("Cannot allocate trampoline\n"); 18 panic("Cannot allocate trampoline\n");
19 19
20 x86_trampoline_base = __va(mem); 20 x86_trampoline_base = __va(mem);
21 memblock_x86_reserve_range(mem, mem + size, "TRAMPOLINE"); 21 memblock_reserve(mem, size);
22 22
23 printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", 23 printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
24 x86_trampoline_base, (unsigned long long)mem, size); 24 x86_trampoline_base, (unsigned long long)mem, size);
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 3d11327c9ab4..23d8e5fecf76 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -27,6 +27,4 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o
27obj-$(CONFIG_ACPI_NUMA) += srat.o 27obj-$(CONFIG_ACPI_NUMA) += srat.o
28obj-$(CONFIG_NUMA_EMU) += numa_emulation.o 28obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
29 29
30obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
31
32obj-$(CONFIG_MEMTEST) += memtest.o 30obj-$(CONFIG_MEMTEST) += memtest.o
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 87488b93a65c..a298914058f9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -67,7 +67,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
67 good_end = max_pfn_mapped << PAGE_SHIFT; 67 good_end = max_pfn_mapped << PAGE_SHIFT;
68 68
69 base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); 69 base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
70 if (base == MEMBLOCK_ERROR) 70 if (!base)
71 panic("Cannot find space for the kernel page tables"); 71 panic("Cannot find space for the kernel page tables");
72 72
73 pgt_buf_start = base >> PAGE_SHIFT; 73 pgt_buf_start = base >> PAGE_SHIFT;
@@ -80,7 +80,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
80 80
81void __init native_pagetable_reserve(u64 start, u64 end) 81void __init native_pagetable_reserve(u64 start, u64 end)
82{ 82{
83 memblock_x86_reserve_range(start, end, "PGTABLE"); 83 memblock_reserve(start, end - start);
84} 84}
85 85
86struct map_range { 86struct map_range {
@@ -279,8 +279,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
279 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) 279 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
280 * so that they can be reused for other purposes. 280 * so that they can be reused for other purposes.
281 * 281 *
282 * On native it just means calling memblock_x86_reserve_range, on Xen it 282 * On native it just means calling memblock_reserve, on Xen it also
283 * also means marking RW the pagetable pages that we allocated before 283 * means marking RW the pagetable pages that we allocated before
284 * but that haven't been used. 284 * but that haven't been used.
285 * 285 *
286 * In fact on xen we mark RO the whole range pgt_buf_start - 286 * In fact on xen we mark RO the whole range pgt_buf_start -
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 29f7c6d98179..0c1da394a634 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -427,23 +427,17 @@ static void __init add_one_highpage_init(struct page *page)
427void __init add_highpages_with_active_regions(int nid, 427void __init add_highpages_with_active_regions(int nid,
428 unsigned long start_pfn, unsigned long end_pfn) 428 unsigned long start_pfn, unsigned long end_pfn)
429{ 429{
430 struct range *range; 430 phys_addr_t start, end;
431 int nr_range; 431 u64 i;
432 int i; 432
433 433 for_each_free_mem_range(i, nid, &start, &end, NULL) {
434 nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn); 434 unsigned long pfn = clamp_t(unsigned long, PFN_UP(start),
435 435 start_pfn, end_pfn);
436 for (i = 0; i < nr_range; i++) { 436 unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end),
437 struct page *page; 437 start_pfn, end_pfn);
438 int node_pfn; 438 for ( ; pfn < e_pfn; pfn++)
439 439 if (pfn_valid(pfn))
440 for (node_pfn = range[i].start; node_pfn < range[i].end; 440 add_one_highpage_init(pfn_to_page(pfn));
441 node_pfn++) {
442 if (!pfn_valid(node_pfn))
443 continue;
444 page = pfn_to_page(node_pfn);
445 add_one_highpage_init(page);
446 }
447 } 441 }
448} 442}
449#else 443#else
@@ -650,18 +644,18 @@ void __init initmem_init(void)
650 highstart_pfn = highend_pfn = max_pfn; 644 highstart_pfn = highend_pfn = max_pfn;
651 if (max_pfn > max_low_pfn) 645 if (max_pfn > max_low_pfn)
652 highstart_pfn = max_low_pfn; 646 highstart_pfn = max_low_pfn;
653 memblock_x86_register_active_regions(0, 0, highend_pfn);
654 sparse_memory_present_with_active_regions(0);
655 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", 647 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
656 pages_to_mb(highend_pfn - highstart_pfn)); 648 pages_to_mb(highend_pfn - highstart_pfn));
657 num_physpages = highend_pfn; 649 num_physpages = highend_pfn;
658 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; 650 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
659#else 651#else
660 memblock_x86_register_active_regions(0, 0, max_low_pfn);
661 sparse_memory_present_with_active_regions(0);
662 num_physpages = max_low_pfn; 652 num_physpages = max_low_pfn;
663 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; 653 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
664#endif 654#endif
655
656 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
657 sparse_memory_present_with_active_regions(0);
658
665#ifdef CONFIG_FLATMEM 659#ifdef CONFIG_FLATMEM
666 max_mapnr = num_physpages; 660 max_mapnr = num_physpages;
667#endif 661#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bbaaa005bf0e..a8a56ce3a962 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -608,7 +608,7 @@ kernel_physical_mapping_init(unsigned long start,
608#ifndef CONFIG_NUMA 608#ifndef CONFIG_NUMA
609void __init initmem_init(void) 609void __init initmem_init(void)
610{ 610{
611 memblock_x86_register_active_regions(0, 0, max_pfn); 611 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
612} 612}
613#endif 613#endif
614 614
diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c
deleted file mode 100644
index 992da5ec5a64..000000000000
--- a/arch/x86/mm/memblock.c
+++ /dev/null
@@ -1,348 +0,0 @@
1#include <linux/kernel.h>
2#include <linux/types.h>
3#include <linux/init.h>
4#include <linux/bitops.h>
5#include <linux/memblock.h>
6#include <linux/bootmem.h>
7#include <linux/mm.h>
8#include <linux/range.h>
9
10/* Check for already reserved areas */
11bool __init memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align)
12{
13 struct memblock_region *r;
14 u64 addr = *addrp, last;
15 u64 size = *sizep;
16 bool changed = false;
17
18again:
19 last = addr + size;
20 for_each_memblock(reserved, r) {
21 if (last > r->base && addr < r->base) {
22 size = r->base - addr;
23 changed = true;
24 goto again;
25 }
26 if (last > (r->base + r->size) && addr < (r->base + r->size)) {
27 addr = round_up(r->base + r->size, align);
28 size = last - addr;
29 changed = true;
30 goto again;
31 }
32 if (last <= (r->base + r->size) && addr >= r->base) {
33 *sizep = 0;
34 return false;
35 }
36 }
37 if (changed) {
38 *addrp = addr;
39 *sizep = size;
40 }
41 return changed;
42}
43
44/*
45 * Find next free range after start, and size is returned in *sizep
46 */
47u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align)
48{
49 struct memblock_region *r;
50
51 for_each_memblock(memory, r) {
52 u64 ei_start = r->base;
53 u64 ei_last = ei_start + r->size;
54 u64 addr;
55
56 addr = round_up(ei_start, align);
57 if (addr < start)
58 addr = round_up(start, align);
59 if (addr >= ei_last)
60 continue;
61 *sizep = ei_last - addr;
62 while (memblock_x86_check_reserved_size(&addr, sizep, align))
63 ;
64
65 if (*sizep)
66 return addr;
67 }
68
69 return MEMBLOCK_ERROR;
70}
71
72static __init struct range *find_range_array(int count)
73{
74 u64 end, size, mem;
75 struct range *range;
76
77 size = sizeof(struct range) * count;
78 end = memblock.current_limit;
79
80 mem = memblock_find_in_range(0, end, size, sizeof(struct range));
81 if (mem == MEMBLOCK_ERROR)
82 panic("can not find more space for range array");
83
84 /*
85 * This range is tempoaray, so don't reserve it, it will not be
86 * overlapped because We will not alloccate new buffer before
87 * We discard this one
88 */
89 range = __va(mem);
90 memset(range, 0, size);
91
92 return range;
93}
94
95static void __init memblock_x86_subtract_reserved(struct range *range, int az)
96{
97 u64 final_start, final_end;
98 struct memblock_region *r;
99
100 /* Take out region array itself at first*/
101 memblock_free_reserved_regions();
102
103 memblock_dbg("Subtract (%ld early reservations)\n", memblock.reserved.cnt);
104
105 for_each_memblock(reserved, r) {
106 memblock_dbg(" [%010llx-%010llx]\n", (u64)r->base, (u64)r->base + r->size - 1);
107 final_start = PFN_DOWN(r->base);
108 final_end = PFN_UP(r->base + r->size);
109 if (final_start >= final_end)
110 continue;
111 subtract_range(range, az, final_start, final_end);
112 }
113
114 /* Put region array back ? */
115 memblock_reserve_reserved_regions();
116}
117
118struct count_data {
119 int nr;
120};
121
122static int __init count_work_fn(unsigned long start_pfn,
123 unsigned long end_pfn, void *datax)
124{
125 struct count_data *data = datax;
126
127 data->nr++;
128
129 return 0;
130}
131
132static int __init count_early_node_map(int nodeid)
133{
134 struct count_data data;
135
136 data.nr = 0;
137 work_with_active_regions(nodeid, count_work_fn, &data);
138
139 return data.nr;
140}
141
142int __init __get_free_all_memory_range(struct range **rangep, int nodeid,
143 unsigned long start_pfn, unsigned long end_pfn)
144{
145 int count;
146 struct range *range;
147 int nr_range;
148
149 count = (memblock.reserved.cnt + count_early_node_map(nodeid)) * 2;
150
151 range = find_range_array(count);
152 nr_range = 0;
153
154 /*
155 * Use early_node_map[] and memblock.reserved.region to get range array
156 * at first
157 */
158 nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
159 subtract_range(range, count, 0, start_pfn);
160 subtract_range(range, count, end_pfn, -1ULL);
161
162 memblock_x86_subtract_reserved(range, count);
163 nr_range = clean_sort_range(range, count);
164
165 *rangep = range;
166 return nr_range;
167}
168
169int __init get_free_all_memory_range(struct range **rangep, int nodeid)
170{
171 unsigned long end_pfn = -1UL;
172
173#ifdef CONFIG_X86_32
174 end_pfn = max_low_pfn;
175#endif
176 return __get_free_all_memory_range(rangep, nodeid, 0, end_pfn);
177}
178
179static u64 __init __memblock_x86_memory_in_range(u64 addr, u64 limit, bool get_free)
180{
181 int i, count;
182 struct range *range;
183 int nr_range;
184 u64 final_start, final_end;
185 u64 free_size;
186 struct memblock_region *r;
187
188 count = (memblock.reserved.cnt + memblock.memory.cnt) * 2;
189
190 range = find_range_array(count);
191 nr_range = 0;
192
193 addr = PFN_UP(addr);
194 limit = PFN_DOWN(limit);
195
196 for_each_memblock(memory, r) {
197 final_start = PFN_UP(r->base);
198 final_end = PFN_DOWN(r->base + r->size);
199 if (final_start >= final_end)
200 continue;
201 if (final_start >= limit || final_end <= addr)
202 continue;
203
204 nr_range = add_range(range, count, nr_range, final_start, final_end);
205 }
206 subtract_range(range, count, 0, addr);
207 subtract_range(range, count, limit, -1ULL);
208
209 /* Subtract memblock.reserved.region in range ? */
210 if (!get_free)
211 goto sort_and_count_them;
212 for_each_memblock(reserved, r) {
213 final_start = PFN_DOWN(r->base);
214 final_end = PFN_UP(r->base + r->size);
215 if (final_start >= final_end)
216 continue;
217 if (final_start >= limit || final_end <= addr)
218 continue;
219
220 subtract_range(range, count, final_start, final_end);
221 }
222
223sort_and_count_them:
224 nr_range = clean_sort_range(range, count);
225
226 free_size = 0;
227 for (i = 0; i < nr_range; i++)
228 free_size += range[i].end - range[i].start;
229
230 return free_size << PAGE_SHIFT;
231}
232
233u64 __init memblock_x86_free_memory_in_range(u64 addr, u64 limit)
234{
235 return __memblock_x86_memory_in_range(addr, limit, true);
236}
237
238u64 __init memblock_x86_memory_in_range(u64 addr, u64 limit)
239{
240 return __memblock_x86_memory_in_range(addr, limit, false);
241}
242
243void __init memblock_x86_reserve_range(u64 start, u64 end, char *name)
244{
245 if (start == end)
246 return;
247
248 if (WARN_ONCE(start > end, "memblock_x86_reserve_range: wrong range [%#llx, %#llx)\n", start, end))
249 return;
250
251 memblock_dbg(" memblock_x86_reserve_range: [%#010llx-%#010llx] %16s\n", start, end - 1, name);
252
253 memblock_reserve(start, end - start);
254}
255
256void __init memblock_x86_free_range(u64 start, u64 end)
257{
258 if (start == end)
259 return;
260
261 if (WARN_ONCE(start > end, "memblock_x86_free_range: wrong range [%#llx, %#llx)\n", start, end))
262 return;
263
264 memblock_dbg(" memblock_x86_free_range: [%#010llx-%#010llx]\n", start, end - 1);
265
266 memblock_free(start, end - start);
267}
268
269/*
270 * Need to call this function after memblock_x86_register_active_regions,
271 * so early_node_map[] is filled already.
272 */
273u64 __init memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align)
274{
275 u64 addr;
276 addr = find_memory_core_early(nid, size, align, start, end);
277 if (addr != MEMBLOCK_ERROR)
278 return addr;
279
280 /* Fallback, should already have start end within node range */
281 return memblock_find_in_range(start, end, size, align);
282}
283
284/*
285 * Finds an active region in the address range from start_pfn to last_pfn and
286 * returns its range in ei_startpfn and ei_endpfn for the memblock entry.
287 */
288static int __init memblock_x86_find_active_region(const struct memblock_region *ei,
289 unsigned long start_pfn,
290 unsigned long last_pfn,
291 unsigned long *ei_startpfn,
292 unsigned long *ei_endpfn)
293{
294 u64 align = PAGE_SIZE;
295
296 *ei_startpfn = round_up(ei->base, align) >> PAGE_SHIFT;
297 *ei_endpfn = round_down(ei->base + ei->size, align) >> PAGE_SHIFT;
298
299 /* Skip map entries smaller than a page */
300 if (*ei_startpfn >= *ei_endpfn)
301 return 0;
302
303 /* Skip if map is outside the node */
304 if (*ei_endpfn <= start_pfn || *ei_startpfn >= last_pfn)
305 return 0;
306
307 /* Check for overlaps */
308 if (*ei_startpfn < start_pfn)
309 *ei_startpfn = start_pfn;
310 if (*ei_endpfn > last_pfn)
311 *ei_endpfn = last_pfn;
312
313 return 1;
314}
315
316/* Walk the memblock.memory map and register active regions within a node */
317void __init memblock_x86_register_active_regions(int nid, unsigned long start_pfn,
318 unsigned long last_pfn)
319{
320 unsigned long ei_startpfn;
321 unsigned long ei_endpfn;
322 struct memblock_region *r;
323
324 for_each_memblock(memory, r)
325 if (memblock_x86_find_active_region(r, start_pfn, last_pfn,
326 &ei_startpfn, &ei_endpfn))
327 add_active_range(nid, ei_startpfn, ei_endpfn);
328}
329
330/*
331 * Find the hole size (in bytes) in the memory range.
332 * @start: starting address of the memory range to scan
333 * @end: ending address of the memory range to scan
334 */
335u64 __init memblock_x86_hole_size(u64 start, u64 end)
336{
337 unsigned long start_pfn = start >> PAGE_SHIFT;
338 unsigned long last_pfn = end >> PAGE_SHIFT;
339 unsigned long ei_startpfn, ei_endpfn, ram = 0;
340 struct memblock_region *r;
341
342 for_each_memblock(memory, r)
343 if (memblock_x86_find_active_region(r, start_pfn, last_pfn,
344 &ei_startpfn, &ei_endpfn))
345 ram += ei_endpfn - ei_startpfn;
346
347 return end - start - ((u64)ram << PAGE_SHIFT);
348}
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 92faf3a1c53e..c80b9fb95734 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -34,7 +34,7 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
34 (unsigned long long) pattern, 34 (unsigned long long) pattern,
35 (unsigned long long) start_bad, 35 (unsigned long long) start_bad,
36 (unsigned long long) end_bad); 36 (unsigned long long) end_bad);
37 memblock_x86_reserve_range(start_bad, end_bad, "BAD RAM"); 37 memblock_reserve(start_bad, end_bad - start_bad);
38} 38}
39 39
40static void __init memtest(u64 pattern, u64 start_phys, u64 size) 40static void __init memtest(u64 pattern, u64 start_phys, u64 size)
@@ -70,24 +70,19 @@ static void __init memtest(u64 pattern, u64 start_phys, u64 size)
70 70
71static void __init do_one_pass(u64 pattern, u64 start, u64 end) 71static void __init do_one_pass(u64 pattern, u64 start, u64 end)
72{ 72{
73 u64 size = 0; 73 u64 i;
74 74 phys_addr_t this_start, this_end;
75 while (start < end) { 75
76 start = memblock_x86_find_in_range_size(start, &size, 1); 76 for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) {
77 77 this_start = clamp_t(phys_addr_t, this_start, start, end);
78 /* done ? */ 78 this_end = clamp_t(phys_addr_t, this_end, start, end);
79 if (start >= end) 79 if (this_start < this_end) {
80 break; 80 printk(KERN_INFO " %010llx - %010llx pattern %016llx\n",
81 if (start + size > end) 81 (unsigned long long)this_start,
82 size = end - start; 82 (unsigned long long)this_end,
83 83 (unsigned long long)cpu_to_be64(pattern));
84 printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", 84 memtest(pattern, this_start, this_end - this_start);
85 (unsigned long long) start, 85 }
86 (unsigned long long) start + size,
87 (unsigned long long) cpu_to_be64(pattern));
88 memtest(pattern, start, size);
89
90 start += size;
91 } 86 }
92} 87}
93 88
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index fbeaaf416610..496f494593bf 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -192,8 +192,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
192/* Initialize NODE_DATA for a node on the local memory */ 192/* Initialize NODE_DATA for a node on the local memory */
193static void __init setup_node_data(int nid, u64 start, u64 end) 193static void __init setup_node_data(int nid, u64 start, u64 end)
194{ 194{
195 const u64 nd_low = PFN_PHYS(MAX_DMA_PFN);
196 const u64 nd_high = PFN_PHYS(max_pfn_mapped);
197 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 195 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
198 bool remapped = false; 196 bool remapped = false;
199 u64 nd_pa; 197 u64 nd_pa;
@@ -224,17 +222,12 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
224 nd_pa = __pa(nd); 222 nd_pa = __pa(nd);
225 remapped = true; 223 remapped = true;
226 } else { 224 } else {
227 nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high, 225 nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
228 nd_size, SMP_CACHE_BYTES); 226 if (!nd_pa) {
229 if (nd_pa == MEMBLOCK_ERROR)
230 nd_pa = memblock_find_in_range(nd_low, nd_high,
231 nd_size, SMP_CACHE_BYTES);
232 if (nd_pa == MEMBLOCK_ERROR) {
233 pr_err("Cannot find %zu bytes in node %d\n", 227 pr_err("Cannot find %zu bytes in node %d\n",
234 nd_size, nid); 228 nd_size, nid);
235 return; 229 return;
236 } 230 }
237 memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
238 nd = __va(nd_pa); 231 nd = __va(nd_pa);
239 } 232 }
240 233
@@ -371,8 +364,7 @@ void __init numa_reset_distance(void)
371 364
372 /* numa_distance could be 1LU marking allocation failure, test cnt */ 365 /* numa_distance could be 1LU marking allocation failure, test cnt */
373 if (numa_distance_cnt) 366 if (numa_distance_cnt)
374 memblock_x86_free_range(__pa(numa_distance), 367 memblock_free(__pa(numa_distance), size);
375 __pa(numa_distance) + size);
376 numa_distance_cnt = 0; 368 numa_distance_cnt = 0;
377 numa_distance = NULL; /* enable table creation */ 369 numa_distance = NULL; /* enable table creation */
378} 370}
@@ -395,13 +387,13 @@ static int __init numa_alloc_distance(void)
395 387
396 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), 388 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
397 size, PAGE_SIZE); 389 size, PAGE_SIZE);
398 if (phys == MEMBLOCK_ERROR) { 390 if (!phys) {
399 pr_warning("NUMA: Warning: can't allocate distance table!\n"); 391 pr_warning("NUMA: Warning: can't allocate distance table!\n");
400 /* don't retry until explicitly reset */ 392 /* don't retry until explicitly reset */
401 numa_distance = (void *)1LU; 393 numa_distance = (void *)1LU;
402 return -ENOMEM; 394 return -ENOMEM;
403 } 395 }
404 memblock_x86_reserve_range(phys, phys + size, "NUMA DIST"); 396 memblock_reserve(phys, size);
405 397
406 numa_distance = __va(phys); 398 numa_distance = __va(phys);
407 numa_distance_cnt = cnt; 399 numa_distance_cnt = cnt;
@@ -482,8 +474,8 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
482 numaram = 0; 474 numaram = 0;
483 } 475 }
484 476
485 e820ram = max_pfn - (memblock_x86_hole_size(0, 477 e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
486 PFN_PHYS(max_pfn)) >> PAGE_SHIFT); 478
487 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ 479 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
488 if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { 480 if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
489 printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", 481 printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n",
@@ -505,13 +497,10 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
505 if (WARN_ON(nodes_empty(node_possible_map))) 497 if (WARN_ON(nodes_empty(node_possible_map)))
506 return -EINVAL; 498 return -EINVAL;
507 499
508 for (i = 0; i < mi->nr_blks; i++) 500 for (i = 0; i < mi->nr_blks; i++) {
509 memblock_x86_register_active_regions(mi->blk[i].nid, 501 struct numa_memblk *mb = &mi->blk[i];
510 mi->blk[i].start >> PAGE_SHIFT, 502 memblock_set_node(mb->start, mb->end - mb->start, mb->nid);
511 mi->blk[i].end >> PAGE_SHIFT); 503 }
512
513 /* for out of order entries */
514 sort_node_map();
515 504
516 /* 505 /*
517 * If sections array is gonna be used for pfn -> nid mapping, check 506 * If sections array is gonna be used for pfn -> nid mapping, check
@@ -545,6 +534,8 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
545 setup_node_data(nid, start, end); 534 setup_node_data(nid, start, end);
546 } 535 }
547 536
537 /* Dump memblock with node info and return. */
538 memblock_dump_all();
548 return 0; 539 return 0;
549} 540}
550 541
@@ -582,7 +573,7 @@ static int __init numa_init(int (*init_func)(void))
582 nodes_clear(node_possible_map); 573 nodes_clear(node_possible_map);
583 nodes_clear(node_online_map); 574 nodes_clear(node_online_map);
584 memset(&numa_meminfo, 0, sizeof(numa_meminfo)); 575 memset(&numa_meminfo, 0, sizeof(numa_meminfo));
585 remove_all_active_ranges(); 576 WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
586 numa_reset_distance(); 577 numa_reset_distance();
587 578
588 ret = init_func(); 579 ret = init_func();
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 3adebe7e536a..534255a36b6b 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -199,23 +199,23 @@ void __init init_alloc_remap(int nid, u64 start, u64 end)
199 199
200 /* allocate node memory and the lowmem remap area */ 200 /* allocate node memory and the lowmem remap area */
201 node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); 201 node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES);
202 if (node_pa == MEMBLOCK_ERROR) { 202 if (!node_pa) {
203 pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", 203 pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
204 size, nid); 204 size, nid);
205 return; 205 return;
206 } 206 }
207 memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); 207 memblock_reserve(node_pa, size);
208 208
209 remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, 209 remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
210 max_low_pfn << PAGE_SHIFT, 210 max_low_pfn << PAGE_SHIFT,
211 size, LARGE_PAGE_BYTES); 211 size, LARGE_PAGE_BYTES);
212 if (remap_pa == MEMBLOCK_ERROR) { 212 if (!remap_pa) {
213 pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", 213 pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
214 size, nid); 214 size, nid);
215 memblock_x86_free_range(node_pa, node_pa + size); 215 memblock_free(node_pa, size);
216 return; 216 return;
217 } 217 }
218 memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG"); 218 memblock_reserve(remap_pa, size);
219 remap_va = phys_to_virt(remap_pa); 219 remap_va = phys_to_virt(remap_pa);
220 220
221 /* perform actual remap */ 221 /* perform actual remap */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index dd27f401f0a0..92e27119ee1a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -19,7 +19,7 @@ unsigned long __init numa_free_all_bootmem(void)
19 for_each_online_node(i) 19 for_each_online_node(i)
20 pages += free_all_bootmem_node(NODE_DATA(i)); 20 pages += free_all_bootmem_node(NODE_DATA(i));
21 21
22 pages += free_all_memory_core_early(MAX_NUMNODES); 22 pages += free_low_memory_core_early(MAX_NUMNODES);
23 23
24 return pages; 24 return pages;
25} 25}
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index d0ed086b6247..46db56845f18 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -28,6 +28,16 @@ static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
28 return -ENOENT; 28 return -ENOENT;
29} 29}
30 30
31static u64 mem_hole_size(u64 start, u64 end)
32{
33 unsigned long start_pfn = PFN_UP(start);
34 unsigned long end_pfn = PFN_DOWN(end);
35
36 if (start_pfn < end_pfn)
37 return PFN_PHYS(absent_pages_in_range(start_pfn, end_pfn));
38 return 0;
39}
40
31/* 41/*
32 * Sets up nid to range from @start to @end. The return value is -errno if 42 * Sets up nid to range from @start to @end. The return value is -errno if
33 * something went wrong, 0 otherwise. 43 * something went wrong, 0 otherwise.
@@ -89,7 +99,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
89 * Calculate target node size. x86_32 freaks on __udivdi3() so do 99 * Calculate target node size. x86_32 freaks on __udivdi3() so do
90 * the division in ulong number of pages and convert back. 100 * the division in ulong number of pages and convert back.
91 */ 101 */
92 size = max_addr - addr - memblock_x86_hole_size(addr, max_addr); 102 size = max_addr - addr - mem_hole_size(addr, max_addr);
93 size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes); 103 size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes);
94 104
95 /* 105 /*
@@ -135,8 +145,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
135 * Continue to add memory to this fake node if its 145 * Continue to add memory to this fake node if its
136 * non-reserved memory is less than the per-node size. 146 * non-reserved memory is less than the per-node size.
137 */ 147 */
138 while (end - start - 148 while (end - start - mem_hole_size(start, end) < size) {
139 memblock_x86_hole_size(start, end) < size) {
140 end += FAKE_NODE_MIN_SIZE; 149 end += FAKE_NODE_MIN_SIZE;
141 if (end > limit) { 150 if (end > limit) {
142 end = limit; 151 end = limit;
@@ -150,7 +159,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
150 * this one must extend to the boundary. 159 * this one must extend to the boundary.
151 */ 160 */
152 if (end < dma32_end && dma32_end - end - 161 if (end < dma32_end && dma32_end - end -
153 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) 162 mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
154 end = dma32_end; 163 end = dma32_end;
155 164
156 /* 165 /*
@@ -158,8 +167,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
158 * next node, this one must extend to the end of the 167 * next node, this one must extend to the end of the
159 * physical node. 168 * physical node.
160 */ 169 */
161 if (limit - end - 170 if (limit - end - mem_hole_size(end, limit) < size)
162 memblock_x86_hole_size(end, limit) < size)
163 end = limit; 171 end = limit;
164 172
165 ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes, 173 ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes,
@@ -180,7 +188,7 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
180{ 188{
181 u64 end = start + size; 189 u64 end = start + size;
182 190
183 while (end - start - memblock_x86_hole_size(start, end) < size) { 191 while (end - start - mem_hole_size(start, end) < size) {
184 end += FAKE_NODE_MIN_SIZE; 192 end += FAKE_NODE_MIN_SIZE;
185 if (end > max_addr) { 193 if (end > max_addr) {
186 end = max_addr; 194 end = max_addr;
@@ -211,8 +219,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
211 * creates a uniform distribution of node sizes across the entire 219 * creates a uniform distribution of node sizes across the entire
212 * machine (but not necessarily over physical nodes). 220 * machine (but not necessarily over physical nodes).
213 */ 221 */
214 min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / 222 min_size = (max_addr - addr - mem_hole_size(addr, max_addr)) / MAX_NUMNODES;
215 MAX_NUMNODES;
216 min_size = max(min_size, FAKE_NODE_MIN_SIZE); 223 min_size = max(min_size, FAKE_NODE_MIN_SIZE);
217 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) 224 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
218 min_size = (min_size + FAKE_NODE_MIN_SIZE) & 225 min_size = (min_size + FAKE_NODE_MIN_SIZE) &
@@ -252,7 +259,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
252 * this one must extend to the boundary. 259 * this one must extend to the boundary.
253 */ 260 */
254 if (end < dma32_end && dma32_end - end - 261 if (end < dma32_end && dma32_end - end -
255 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) 262 mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
256 end = dma32_end; 263 end = dma32_end;
257 264
258 /* 265 /*
@@ -260,8 +267,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
260 * next node, this one must extend to the end of the 267 * next node, this one must extend to the end of the
261 * physical node. 268 * physical node.
262 */ 269 */
263 if (limit - end - 270 if (limit - end - mem_hole_size(end, limit) < size)
264 memblock_x86_hole_size(end, limit) < size)
265 end = limit; 271 end = limit;
266 272
267 ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES, 273 ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
@@ -351,11 +357,11 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
351 357
352 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), 358 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
353 phys_size, PAGE_SIZE); 359 phys_size, PAGE_SIZE);
354 if (phys == MEMBLOCK_ERROR) { 360 if (!phys) {
355 pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); 361 pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
356 goto no_emu; 362 goto no_emu;
357 } 363 }
358 memblock_x86_reserve_range(phys, phys + phys_size, "TMP NUMA DIST"); 364 memblock_reserve(phys, phys_size);
359 phys_dist = __va(phys); 365 phys_dist = __va(phys);
360 366
361 for (i = 0; i < numa_dist_cnt; i++) 367 for (i = 0; i < numa_dist_cnt; i++)
@@ -424,7 +430,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
424 430
425 /* free the copied physical distance table */ 431 /* free the copied physical distance table */
426 if (phys_dist) 432 if (phys_dist)
427 memblock_x86_free_range(__pa(phys_dist), __pa(phys_dist) + phys_size); 433 memblock_free(__pa(phys_dist), phys_size);
428 return; 434 return;
429 435
430no_emu: 436no_emu:
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 37718f0f053d..4a01967f02e7 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -352,8 +352,7 @@ void __init efi_memblock_x86_reserve_range(void)
352 boot_params.efi_info.efi_memdesc_size; 352 boot_params.efi_info.efi_memdesc_size;
353 memmap.desc_version = boot_params.efi_info.efi_memdesc_version; 353 memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
354 memmap.desc_size = boot_params.efi_info.efi_memdesc_size; 354 memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
355 memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size, 355 memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
356 "EFI memmap");
357} 356}
358 357
359#if EFI_DEBUG 358#if EFI_DEBUG
@@ -397,16 +396,14 @@ void __init efi_reserve_boot_services(void)
397 if ((start+size >= virt_to_phys(_text) 396 if ((start+size >= virt_to_phys(_text)
398 && start <= virt_to_phys(_end)) || 397 && start <= virt_to_phys(_end)) ||
399 !e820_all_mapped(start, start+size, E820_RAM) || 398 !e820_all_mapped(start, start+size, E820_RAM) ||
400 memblock_x86_check_reserved_size(&start, &size, 399 memblock_is_region_reserved(start, size)) {
401 1<<EFI_PAGE_SHIFT)) {
402 /* Could not reserve, skip it */ 400 /* Could not reserve, skip it */
403 md->num_pages = 0; 401 md->num_pages = 0;
404 memblock_dbg(PFX "Could not reserve boot range " 402 memblock_dbg(PFX "Could not reserve boot range "
405 "[0x%010llx-0x%010llx]\n", 403 "[0x%010llx-0x%010llx]\n",
406 start, start+size-1); 404 start, start+size-1);
407 } else 405 } else
408 memblock_x86_reserve_range(start, start+size, 406 memblock_reserve(start, size);
409 "EFI Boot");
410 } 407 }
411} 408}
412 409
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 1f928659c338..12eb07bfb267 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1215,8 +1215,6 @@ asmlinkage void __init xen_start_kernel(void)
1215 local_irq_disable(); 1215 local_irq_disable();
1216 early_boot_irqs_disabled = true; 1216 early_boot_irqs_disabled = true;
1217 1217
1218 memblock_init();
1219
1220 xen_raw_console_write("mapping kernel into physical memory\n"); 1218 xen_raw_console_write("mapping kernel into physical memory\n");
1221 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); 1219 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
1222 xen_ident_map_ISA(); 1220 xen_ident_map_ISA();
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 87f6673b1207..f4bf8aa574f4 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1774,10 +1774,8 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1774 __xen_write_cr3(true, __pa(pgd)); 1774 __xen_write_cr3(true, __pa(pgd));
1775 xen_mc_issue(PARAVIRT_LAZY_CPU); 1775 xen_mc_issue(PARAVIRT_LAZY_CPU);
1776 1776
1777 memblock_x86_reserve_range(__pa(xen_start_info->pt_base), 1777 memblock_reserve(__pa(xen_start_info->pt_base),
1778 __pa(xen_start_info->pt_base + 1778 xen_start_info->nr_pt_frames * PAGE_SIZE);
1779 xen_start_info->nr_pt_frames * PAGE_SIZE),
1780 "XEN PAGETABLES");
1781 1779
1782 return pgd; 1780 return pgd;
1783} 1781}
@@ -1853,10 +1851,8 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1853 PFN_DOWN(__pa(initial_page_table))); 1851 PFN_DOWN(__pa(initial_page_table)));
1854 xen_write_cr3(__pa(initial_page_table)); 1852 xen_write_cr3(__pa(initial_page_table));
1855 1853
1856 memblock_x86_reserve_range(__pa(xen_start_info->pt_base), 1854 memblock_reserve(__pa(xen_start_info->pt_base),
1857 __pa(xen_start_info->pt_base + 1855 xen_start_info->nr_pt_frames * PAGE_SIZE));
1858 xen_start_info->nr_pt_frames * PAGE_SIZE),
1859 "XEN PAGETABLES");
1860 1856
1861 return initial_page_table; 1857 return initial_page_table;
1862} 1858}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index b2c7179fa263..e03c63692176 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -75,7 +75,7 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
75 if (i == XEN_EXTRA_MEM_MAX_REGIONS) 75 if (i == XEN_EXTRA_MEM_MAX_REGIONS)
76 printk(KERN_WARNING "Warning: not enough extra memory regions\n"); 76 printk(KERN_WARNING "Warning: not enough extra memory regions\n");
77 77
78 memblock_x86_reserve_range(start, start + size, "XEN EXTRA"); 78 memblock_reserve(start, size);
79 79
80 xen_max_p2m_pfn = PFN_DOWN(start + size); 80 xen_max_p2m_pfn = PFN_DOWN(start + size);
81 81
@@ -311,9 +311,8 @@ char * __init xen_memory_setup(void)
311 * - xen_start_info 311 * - xen_start_info
312 * See comment above "struct start_info" in <xen/interface/xen.h> 312 * See comment above "struct start_info" in <xen/interface/xen.h>
313 */ 313 */
314 memblock_x86_reserve_range(__pa(xen_start_info->mfn_list), 314 memblock_reserve(__pa(xen_start_info->mfn_list),
315 __pa(xen_start_info->pt_base), 315 xen_start_info->pt_base - xen_start_info->mfn_list);
316 "XEN START INFO");
317 316
318 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 317 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
319 318
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index bdc447fd4766..31053a951c34 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -41,6 +41,7 @@
41#include <linux/tboot.h> 41#include <linux/tboot.h>
42#include <linux/dmi.h> 42#include <linux/dmi.h>
43#include <linux/pci-ats.h> 43#include <linux/pci-ats.h>
44#include <linux/memblock.h>
44#include <asm/cacheflush.h> 45#include <asm/cacheflush.h>
45#include <asm/iommu.h> 46#include <asm/iommu.h>
46 47
@@ -2188,18 +2189,6 @@ static inline void iommu_prepare_isa(void)
2188 2189
2189static int md_domain_init(struct dmar_domain *domain, int guest_width); 2190static int md_domain_init(struct dmar_domain *domain, int guest_width);
2190 2191
2191static int __init si_domain_work_fn(unsigned long start_pfn,
2192 unsigned long end_pfn, void *datax)
2193{
2194 int *ret = datax;
2195
2196 *ret = iommu_domain_identity_map(si_domain,
2197 (uint64_t)start_pfn << PAGE_SHIFT,
2198 (uint64_t)end_pfn << PAGE_SHIFT);
2199 return *ret;
2200
2201}
2202
2203static int __init si_domain_init(int hw) 2192static int __init si_domain_init(int hw)
2204{ 2193{
2205 struct dmar_drhd_unit *drhd; 2194 struct dmar_drhd_unit *drhd;
@@ -2231,9 +2220,15 @@ static int __init si_domain_init(int hw)
2231 return 0; 2220 return 0;
2232 2221
2233 for_each_online_node(nid) { 2222 for_each_online_node(nid) {
2234 work_with_active_regions(nid, si_domain_work_fn, &ret); 2223 unsigned long start_pfn, end_pfn;
2235 if (ret) 2224 int i;
2236 return ret; 2225
2226 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2227 ret = iommu_domain_identity_map(si_domain,
2228 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2229 if (ret)
2230 return ret;
2231 }
2237 } 2232 }
2238 2233
2239 return 0; 2234 return 0;
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index ab344a521105..66d3e954eb6c 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -44,7 +44,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
44 unsigned long endpfn); 44 unsigned long endpfn);
45extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); 45extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
46 46
47unsigned long free_all_memory_core_early(int nodeid); 47extern unsigned long free_low_memory_core_early(int nodeid);
48extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); 48extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
49extern unsigned long free_all_bootmem(void); 49extern unsigned long free_all_bootmem(void);
50 50
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index e6b843e16e81..a6bb10235148 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -2,8 +2,6 @@
2#define _LINUX_MEMBLOCK_H 2#define _LINUX_MEMBLOCK_H
3#ifdef __KERNEL__ 3#ifdef __KERNEL__
4 4
5#define MEMBLOCK_ERROR 0
6
7#ifdef CONFIG_HAVE_MEMBLOCK 5#ifdef CONFIG_HAVE_MEMBLOCK
8/* 6/*
9 * Logical memory blocks. 7 * Logical memory blocks.
@@ -19,81 +17,161 @@
19#include <linux/init.h> 17#include <linux/init.h>
20#include <linux/mm.h> 18#include <linux/mm.h>
21 19
22#include <asm/memblock.h>
23
24#define INIT_MEMBLOCK_REGIONS 128 20#define INIT_MEMBLOCK_REGIONS 128
25 21
26struct memblock_region { 22struct memblock_region {
27 phys_addr_t base; 23 phys_addr_t base;
28 phys_addr_t size; 24 phys_addr_t size;
25#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
26 int nid;
27#endif
29}; 28};
30 29
31struct memblock_type { 30struct memblock_type {
32 unsigned long cnt; /* number of regions */ 31 unsigned long cnt; /* number of regions */
33 unsigned long max; /* size of the allocated array */ 32 unsigned long max; /* size of the allocated array */
33 phys_addr_t total_size; /* size of all regions */
34 struct memblock_region *regions; 34 struct memblock_region *regions;
35}; 35};
36 36
37struct memblock { 37struct memblock {
38 phys_addr_t current_limit; 38 phys_addr_t current_limit;
39 phys_addr_t memory_size; /* Updated by memblock_analyze() */
40 struct memblock_type memory; 39 struct memblock_type memory;
41 struct memblock_type reserved; 40 struct memblock_type reserved;
42}; 41};
43 42
44extern struct memblock memblock; 43extern struct memblock memblock;
45extern int memblock_debug; 44extern int memblock_debug;
46extern int memblock_can_resize;
47 45
48#define memblock_dbg(fmt, ...) \ 46#define memblock_dbg(fmt, ...) \
49 if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) 47 if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
50 48
51u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align); 49phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end,
50 phys_addr_t size, phys_addr_t align, int nid);
51phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
52 phys_addr_t size, phys_addr_t align);
52int memblock_free_reserved_regions(void); 53int memblock_free_reserved_regions(void);
53int memblock_reserve_reserved_regions(void); 54int memblock_reserve_reserved_regions(void);
54 55
55extern void memblock_init(void); 56void memblock_allow_resize(void);
56extern void memblock_analyze(void); 57int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid);
57extern long memblock_add(phys_addr_t base, phys_addr_t size); 58int memblock_add(phys_addr_t base, phys_addr_t size);
58extern long memblock_remove(phys_addr_t base, phys_addr_t size); 59int memblock_remove(phys_addr_t base, phys_addr_t size);
59extern long memblock_free(phys_addr_t base, phys_addr_t size); 60int memblock_free(phys_addr_t base, phys_addr_t size);
60extern long memblock_reserve(phys_addr_t base, phys_addr_t size); 61int memblock_reserve(phys_addr_t base, phys_addr_t size);
62
63#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
64void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
65 unsigned long *out_end_pfn, int *out_nid);
66
67/**
68 * for_each_mem_pfn_range - early memory pfn range iterator
69 * @i: an integer used as loop variable
70 * @nid: node selector, %MAX_NUMNODES for all nodes
71 * @p_start: ptr to ulong for start pfn of the range, can be %NULL
72 * @p_end: ptr to ulong for end pfn of the range, can be %NULL
73 * @p_nid: ptr to int for nid of the range, can be %NULL
74 *
75 * Walks over configured memory ranges. Available after early_node_map is
76 * populated.
77 */
78#define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid) \
79 for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \
80 i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
81#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
82
83void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
84 phys_addr_t *out_end, int *out_nid);
85
86/**
87 * for_each_free_mem_range - iterate through free memblock areas
88 * @i: u64 used as loop variable
89 * @nid: node selector, %MAX_NUMNODES for all nodes
90 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
91 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
92 * @p_nid: ptr to int for nid of the range, can be %NULL
93 *
94 * Walks over free (memory && !reserved) areas of memblock. Available as
95 * soon as memblock is initialized.
96 */
97#define for_each_free_mem_range(i, nid, p_start, p_end, p_nid) \
98 for (i = 0, \
99 __next_free_mem_range(&i, nid, p_start, p_end, p_nid); \
100 i != (u64)ULLONG_MAX; \
101 __next_free_mem_range(&i, nid, p_start, p_end, p_nid))
102
103void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
104 phys_addr_t *out_end, int *out_nid);
61 105
62/* The numa aware allocator is only available if 106/**
63 * CONFIG_ARCH_POPULATES_NODE_MAP is set 107 * for_each_free_mem_range_reverse - rev-iterate through free memblock areas
108 * @i: u64 used as loop variable
109 * @nid: node selector, %MAX_NUMNODES for all nodes
110 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
111 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
112 * @p_nid: ptr to int for nid of the range, can be %NULL
113 *
114 * Walks over free (memory && !reserved) areas of memblock in reverse
115 * order. Available as soon as memblock is initialized.
64 */ 116 */
65extern phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, 117#define for_each_free_mem_range_reverse(i, nid, p_start, p_end, p_nid) \
66 int nid); 118 for (i = (u64)ULLONG_MAX, \
67extern phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, 119 __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid); \
68 int nid); 120 i != (u64)ULLONG_MAX; \
121 __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid))
69 122
70extern phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align); 123#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
124int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid);
125
126static inline void memblock_set_region_node(struct memblock_region *r, int nid)
127{
128 r->nid = nid;
129}
130
131static inline int memblock_get_region_node(const struct memblock_region *r)
132{
133 return r->nid;
134}
135#else
136static inline void memblock_set_region_node(struct memblock_region *r, int nid)
137{
138}
139
140static inline int memblock_get_region_node(const struct memblock_region *r)
141{
142 return 0;
143}
144#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
145
146phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
147phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
148
149phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
71 150
72/* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ 151/* Flags for memblock_alloc_base() amd __memblock_alloc_base() */
73#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) 152#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0)
74#define MEMBLOCK_ALLOC_ACCESSIBLE 0 153#define MEMBLOCK_ALLOC_ACCESSIBLE 0
75 154
76extern phys_addr_t memblock_alloc_base(phys_addr_t size, 155phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
77 phys_addr_t align, 156 phys_addr_t max_addr);
78 phys_addr_t max_addr); 157phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
79extern phys_addr_t __memblock_alloc_base(phys_addr_t size, 158 phys_addr_t max_addr);
80 phys_addr_t align, 159phys_addr_t memblock_phys_mem_size(void);
81 phys_addr_t max_addr); 160phys_addr_t memblock_start_of_DRAM(void);
82extern phys_addr_t memblock_phys_mem_size(void); 161phys_addr_t memblock_end_of_DRAM(void);
83extern phys_addr_t memblock_start_of_DRAM(void); 162void memblock_enforce_memory_limit(phys_addr_t memory_limit);
84extern phys_addr_t memblock_end_of_DRAM(void); 163int memblock_is_memory(phys_addr_t addr);
85extern void memblock_enforce_memory_limit(phys_addr_t memory_limit); 164int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
86extern int memblock_is_memory(phys_addr_t addr); 165int memblock_is_reserved(phys_addr_t addr);
87extern int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); 166int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
88extern int memblock_is_reserved(phys_addr_t addr); 167
89extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); 168extern void __memblock_dump_all(void);
90 169
91extern void memblock_dump_all(void); 170static inline void memblock_dump_all(void)
92 171{
93/* Provided by the architecture */ 172 if (memblock_debug)
94extern phys_addr_t memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid); 173 __memblock_dump_all();
95extern int memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, 174}
96 phys_addr_t addr2, phys_addr_t size2);
97 175
98/** 176/**
99 * memblock_set_current_limit - Set the current allocation limit to allow 177 * memblock_set_current_limit - Set the current allocation limit to allow
@@ -101,7 +179,7 @@ extern int memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1,
101 * accessible during boot 179 * accessible during boot
102 * @limit: New limit value (physical address) 180 * @limit: New limit value (physical address)
103 */ 181 */
104extern void memblock_set_current_limit(phys_addr_t limit); 182void memblock_set_current_limit(phys_addr_t limit);
105 183
106 184
107/* 185/*
@@ -154,9 +232,9 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
154 region++) 232 region++)
155 233
156 234
157#ifdef ARCH_DISCARD_MEMBLOCK 235#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
158#define __init_memblock __init 236#define __init_memblock __meminit
159#define __initdata_memblock __initdata 237#define __initdata_memblock __meminitdata
160#else 238#else
161#define __init_memblock 239#define __init_memblock
162#define __initdata_memblock 240#define __initdata_memblock
@@ -165,7 +243,7 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
165#else 243#else
166static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) 244static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
167{ 245{
168 return MEMBLOCK_ERROR; 246 return 0;
169} 247}
170 248
171#endif /* CONFIG_HAVE_MEMBLOCK */ 249#endif /* CONFIG_HAVE_MEMBLOCK */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4baadd18f4ad..5d9b4c9813bd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1253,41 +1253,34 @@ static inline void pgtable_page_dtor(struct page *page)
1253extern void free_area_init(unsigned long * zones_size); 1253extern void free_area_init(unsigned long * zones_size);
1254extern void free_area_init_node(int nid, unsigned long * zones_size, 1254extern void free_area_init_node(int nid, unsigned long * zones_size,
1255 unsigned long zone_start_pfn, unsigned long *zholes_size); 1255 unsigned long zone_start_pfn, unsigned long *zholes_size);
1256#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 1256#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
1257/* 1257/*
1258 * With CONFIG_ARCH_POPULATES_NODE_MAP set, an architecture may initialise its 1258 * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its
1259 * zones, allocate the backing mem_map and account for memory holes in a more 1259 * zones, allocate the backing mem_map and account for memory holes in a more
1260 * architecture independent manner. This is a substitute for creating the 1260 * architecture independent manner. This is a substitute for creating the
1261 * zone_sizes[] and zholes_size[] arrays and passing them to 1261 * zone_sizes[] and zholes_size[] arrays and passing them to
1262 * free_area_init_node() 1262 * free_area_init_node()
1263 * 1263 *
1264 * An architecture is expected to register range of page frames backed by 1264 * An architecture is expected to register range of page frames backed by
1265 * physical memory with add_active_range() before calling 1265 * physical memory with memblock_add[_node]() before calling
1266 * free_area_init_nodes() passing in the PFN each zone ends at. At a basic 1266 * free_area_init_nodes() passing in the PFN each zone ends at. At a basic
1267 * usage, an architecture is expected to do something like 1267 * usage, an architecture is expected to do something like
1268 * 1268 *
1269 * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn, 1269 * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn,
1270 * max_highmem_pfn}; 1270 * max_highmem_pfn};
1271 * for_each_valid_physical_page_range() 1271 * for_each_valid_physical_page_range()
1272 * add_active_range(node_id, start_pfn, end_pfn) 1272 * memblock_add_node(base, size, nid)
1273 * free_area_init_nodes(max_zone_pfns); 1273 * free_area_init_nodes(max_zone_pfns);
1274 * 1274 *
1275 * If the architecture guarantees that there are no holes in the ranges 1275 * free_bootmem_with_active_regions() calls free_bootmem_node() for each
1276 * registered with add_active_range(), free_bootmem_active_regions() 1276 * registered physical page range. Similarly
1277 * will call free_bootmem_node() for each registered physical page range. 1277 * sparse_memory_present_with_active_regions() calls memory_present() for
1278 * Similarly sparse_memory_present_with_active_regions() calls 1278 * each range when SPARSEMEM is enabled.
1279 * memory_present() for each range when SPARSEMEM is enabled.
1280 * 1279 *
1281 * See mm/page_alloc.c for more information on each function exposed by 1280 * See mm/page_alloc.c for more information on each function exposed by
1282 * CONFIG_ARCH_POPULATES_NODE_MAP 1281 * CONFIG_HAVE_MEMBLOCK_NODE_MAP.
1283 */ 1282 */
1284extern void free_area_init_nodes(unsigned long *max_zone_pfn); 1283extern void free_area_init_nodes(unsigned long *max_zone_pfn);
1285extern void add_active_range(unsigned int nid, unsigned long start_pfn,
1286 unsigned long end_pfn);
1287extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
1288 unsigned long end_pfn);
1289extern void remove_all_active_ranges(void);
1290void sort_node_map(void);
1291unsigned long node_map_pfn_alignment(void); 1284unsigned long node_map_pfn_alignment(void);
1292unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, 1285unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
1293 unsigned long end_pfn); 1286 unsigned long end_pfn);
@@ -1300,14 +1293,11 @@ extern void free_bootmem_with_active_regions(int nid,
1300 unsigned long max_low_pfn); 1293 unsigned long max_low_pfn);
1301int add_from_early_node_map(struct range *range, int az, 1294int add_from_early_node_map(struct range *range, int az,
1302 int nr_range, int nid); 1295 int nr_range, int nid);
1303u64 __init find_memory_core_early(int nid, u64 size, u64 align,
1304 u64 goal, u64 limit);
1305typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
1306extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
1307extern void sparse_memory_present_with_active_regions(int nid); 1296extern void sparse_memory_present_with_active_regions(int nid);
1308#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
1309 1297
1310#if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \ 1298#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
1299
1300#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
1311 !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) 1301 !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
1312static inline int __early_pfn_to_nid(unsigned long pfn) 1302static inline int __early_pfn_to_nid(unsigned long pfn)
1313{ 1303{
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 188cb2ffe8db..3ac040f19369 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -598,13 +598,13 @@ struct zonelist {
598#endif 598#endif
599}; 599};
600 600
601#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 601#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
602struct node_active_region { 602struct node_active_region {
603 unsigned long start_pfn; 603 unsigned long start_pfn;
604 unsigned long end_pfn; 604 unsigned long end_pfn;
605 int nid; 605 int nid;
606}; 606};
607#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ 607#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
608 608
609#ifndef CONFIG_DISCONTIGMEM 609#ifndef CONFIG_DISCONTIGMEM
610/* The array of struct pages - for discontigmem use pgdat->lmem_map */ 610/* The array of struct pages - for discontigmem use pgdat->lmem_map */
@@ -720,7 +720,7 @@ extern int movable_zone;
720 720
721static inline int zone_movable_is_highmem(void) 721static inline int zone_movable_is_highmem(void)
722{ 722{
723#if defined(CONFIG_HIGHMEM) && defined(CONFIG_ARCH_POPULATES_NODE_MAP) 723#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE)
724 return movable_zone == ZONE_HIGHMEM; 724 return movable_zone == ZONE_HIGHMEM;
725#else 725#else
726 return 0; 726 return 0;
@@ -938,7 +938,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
938#endif 938#endif
939 939
940#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \ 940#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \
941 !defined(CONFIG_ARCH_POPULATES_NODE_MAP) 941 !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
942static inline unsigned long early_pfn_to_nid(unsigned long pfn) 942static inline unsigned long early_pfn_to_nid(unsigned long pfn)
943{ 943{
944 return 0; 944 return 0;
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 79159de0e341..2110a81c5e2a 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -40,12 +40,6 @@
40#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */ 40#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */
41#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */ 41#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */
42 42
43#ifdef CONFIG_PHYS_ADDR_T_64BIT
44#define MEMBLOCK_INACTIVE 0x3a84fb0144c9e71bULL
45#else
46#define MEMBLOCK_INACTIVE 0x44c9e71bUL
47#endif
48
49#define SLUB_RED_INACTIVE 0xbb 43#define SLUB_RED_INACTIVE 0xbb
50#define SLUB_RED_ACTIVE 0xcc 44#define SLUB_RED_ACTIVE 0xcc
51 45
diff --git a/kernel/printk.c b/kernel/printk.c
index 7982a0a841ea..afc8310c4625 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -199,7 +199,7 @@ void __init setup_log_buf(int early)
199 unsigned long mem; 199 unsigned long mem;
200 200
201 mem = memblock_alloc(new_log_buf_len, PAGE_SIZE); 201 mem = memblock_alloc(new_log_buf_len, PAGE_SIZE);
202 if (mem == MEMBLOCK_ERROR) 202 if (!mem)
203 return; 203 return;
204 new_log_buf = __va(mem); 204 new_log_buf = __va(mem);
205 } else { 205 } else {
diff --git a/mm/Kconfig b/mm/Kconfig
index 011b110365c8..e338407f1225 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -131,6 +131,12 @@ config SPARSEMEM_VMEMMAP
131config HAVE_MEMBLOCK 131config HAVE_MEMBLOCK
132 boolean 132 boolean
133 133
134config HAVE_MEMBLOCK_NODE_MAP
135 boolean
136
137config ARCH_DISCARD_MEMBLOCK
138 boolean
139
134config NO_BOOTMEM 140config NO_BOOTMEM
135 boolean 141 boolean
136 142
diff --git a/mm/memblock.c b/mm/memblock.c
index 84bec4969ed5..2f55f19b7c86 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -20,12 +20,23 @@
20#include <linux/seq_file.h> 20#include <linux/seq_file.h>
21#include <linux/memblock.h> 21#include <linux/memblock.h>
22 22
23struct memblock memblock __initdata_memblock; 23static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
24static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
25
26struct memblock memblock __initdata_memblock = {
27 .memory.regions = memblock_memory_init_regions,
28 .memory.cnt = 1, /* empty dummy entry */
29 .memory.max = INIT_MEMBLOCK_REGIONS,
30
31 .reserved.regions = memblock_reserved_init_regions,
32 .reserved.cnt = 1, /* empty dummy entry */
33 .reserved.max = INIT_MEMBLOCK_REGIONS,
34
35 .current_limit = MEMBLOCK_ALLOC_ANYWHERE,
36};
24 37
25int memblock_debug __initdata_memblock; 38int memblock_debug __initdata_memblock;
26int memblock_can_resize __initdata_memblock; 39static int memblock_can_resize __initdata_memblock;
27static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock;
28static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock;
29 40
30/* inline so we don't get a warning when pr_debug is compiled out */ 41/* inline so we don't get a warning when pr_debug is compiled out */
31static inline const char *memblock_type_name(struct memblock_type *type) 42static inline const char *memblock_type_name(struct memblock_type *type)
@@ -38,20 +49,15 @@ static inline const char *memblock_type_name(struct memblock_type *type)
38 return "unknown"; 49 return "unknown";
39} 50}
40 51
41/* 52/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */
42 * Address comparison utilities 53static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
43 */
44
45static phys_addr_t __init_memblock memblock_align_down(phys_addr_t addr, phys_addr_t size)
46{
47 return addr & ~(size - 1);
48}
49
50static phys_addr_t __init_memblock memblock_align_up(phys_addr_t addr, phys_addr_t size)
51{ 54{
52 return (addr + (size - 1)) & ~(size - 1); 55 return *size = min(*size, (phys_addr_t)ULLONG_MAX - base);
53} 56}
54 57
58/*
59 * Address comparison utilities
60 */
55static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, 61static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
56 phys_addr_t base2, phys_addr_t size2) 62 phys_addr_t base2, phys_addr_t size2)
57{ 63{
@@ -73,83 +79,66 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
73 return (i < type->cnt) ? i : -1; 79 return (i < type->cnt) ? i : -1;
74} 80}
75 81
76/* 82/**
77 * Find, allocate, deallocate or reserve unreserved regions. All allocations 83 * memblock_find_in_range_node - find free area in given range and node
78 * are top-down. 84 * @start: start of candidate range
85 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
86 * @size: size of free area to find
87 * @align: alignment of free area to find
88 * @nid: nid of the free area to find, %MAX_NUMNODES for any node
89 *
90 * Find @size free area aligned to @align in the specified range and node.
91 *
92 * RETURNS:
93 * Found address on success, %0 on failure.
79 */ 94 */
80 95phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
81static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_addr_t end, 96 phys_addr_t end, phys_addr_t size,
82 phys_addr_t size, phys_addr_t align) 97 phys_addr_t align, int nid)
83{ 98{
84 phys_addr_t base, res_base; 99 phys_addr_t this_start, this_end, cand;
85 long j; 100 u64 i;
86
87 /* In case, huge size is requested */
88 if (end < size)
89 return MEMBLOCK_ERROR;
90
91 base = memblock_align_down((end - size), align);
92 101
93 /* Prevent allocations returning 0 as it's also used to 102 /* align @size to avoid excessive fragmentation on reserved array */
94 * indicate an allocation failure 103 size = round_up(size, align);
95 */
96 if (start == 0)
97 start = PAGE_SIZE;
98
99 while (start <= base) {
100 j = memblock_overlaps_region(&memblock.reserved, base, size);
101 if (j < 0)
102 return base;
103 res_base = memblock.reserved.regions[j].base;
104 if (res_base < size)
105 break;
106 base = memblock_align_down(res_base - size, align);
107 }
108 104
109 return MEMBLOCK_ERROR; 105 /* pump up @end */
110}
111
112static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size,
113 phys_addr_t align, phys_addr_t start, phys_addr_t end)
114{
115 long i;
116
117 BUG_ON(0 == size);
118
119 /* Pump up max_addr */
120 if (end == MEMBLOCK_ALLOC_ACCESSIBLE) 106 if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
121 end = memblock.current_limit; 107 end = memblock.current_limit;
122 108
123 /* We do a top-down search, this tends to limit memory 109 /* adjust @start to avoid underflow and allocating the first page */
124 * fragmentation by keeping early boot allocs near the 110 start = max3(start, size, (phys_addr_t)PAGE_SIZE);
125 * top of memory 111 end = max(start, end);
126 */
127 for (i = memblock.memory.cnt - 1; i >= 0; i--) {
128 phys_addr_t memblockbase = memblock.memory.regions[i].base;
129 phys_addr_t memblocksize = memblock.memory.regions[i].size;
130 phys_addr_t bottom, top, found;
131 112
132 if (memblocksize < size) 113 for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) {
133 continue; 114 this_start = clamp(this_start, start, end);
134 if ((memblockbase + memblocksize) <= start) 115 this_end = clamp(this_end, start, end);
135 break; 116
136 bottom = max(memblockbase, start); 117 cand = round_down(this_end - size, align);
137 top = min(memblockbase + memblocksize, end); 118 if (cand >= this_start)
138 if (bottom >= top) 119 return cand;
139 continue;
140 found = memblock_find_region(bottom, top, size, align);
141 if (found != MEMBLOCK_ERROR)
142 return found;
143 } 120 }
144 return MEMBLOCK_ERROR; 121 return 0;
145} 122}
146 123
147/* 124/**
148 * Find a free area with specified alignment in a specific range. 125 * memblock_find_in_range - find free area in given range
126 * @start: start of candidate range
127 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
128 * @size: size of free area to find
129 * @align: alignment of free area to find
130 *
131 * Find @size free area aligned to @align in the specified range.
132 *
133 * RETURNS:
134 * Found address on success, %0 on failure.
149 */ 135 */
150u64 __init_memblock memblock_find_in_range(u64 start, u64 end, u64 size, u64 align) 136phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
137 phys_addr_t end, phys_addr_t size,
138 phys_addr_t align)
151{ 139{
152 return memblock_find_base(size, align, start, end); 140 return memblock_find_in_range_node(start, end, size, align,
141 MAX_NUMNODES);
153} 142}
154 143
155/* 144/*
@@ -178,25 +167,21 @@ int __init_memblock memblock_reserve_reserved_regions(void)
178 167
179static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) 168static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
180{ 169{
181 unsigned long i; 170 type->total_size -= type->regions[r].size;
182 171 memmove(&type->regions[r], &type->regions[r + 1],
183 for (i = r; i < type->cnt - 1; i++) { 172 (type->cnt - (r + 1)) * sizeof(type->regions[r]));
184 type->regions[i].base = type->regions[i + 1].base;
185 type->regions[i].size = type->regions[i + 1].size;
186 }
187 type->cnt--; 173 type->cnt--;
188 174
189 /* Special case for empty arrays */ 175 /* Special case for empty arrays */
190 if (type->cnt == 0) { 176 if (type->cnt == 0) {
177 WARN_ON(type->total_size != 0);
191 type->cnt = 1; 178 type->cnt = 1;
192 type->regions[0].base = 0; 179 type->regions[0].base = 0;
193 type->regions[0].size = 0; 180 type->regions[0].size = 0;
181 memblock_set_region_node(&type->regions[0], MAX_NUMNODES);
194 } 182 }
195} 183}
196 184
197/* Defined below but needed now */
198static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size);
199
200static int __init_memblock memblock_double_array(struct memblock_type *type) 185static int __init_memblock memblock_double_array(struct memblock_type *type)
201{ 186{
202 struct memblock_region *new_array, *old_array; 187 struct memblock_region *new_array, *old_array;
@@ -226,10 +211,10 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
226 */ 211 */
227 if (use_slab) { 212 if (use_slab) {
228 new_array = kmalloc(new_size, GFP_KERNEL); 213 new_array = kmalloc(new_size, GFP_KERNEL);
229 addr = new_array == NULL ? MEMBLOCK_ERROR : __pa(new_array); 214 addr = new_array ? __pa(new_array) : 0;
230 } else 215 } else
231 addr = memblock_find_base(new_size, sizeof(phys_addr_t), 0, MEMBLOCK_ALLOC_ACCESSIBLE); 216 addr = memblock_find_in_range(0, MEMBLOCK_ALLOC_ACCESSIBLE, new_size, sizeof(phys_addr_t));
232 if (addr == MEMBLOCK_ERROR) { 217 if (!addr) {
233 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", 218 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
234 memblock_type_name(type), type->max, type->max * 2); 219 memblock_type_name(type), type->max, type->max * 2);
235 return -1; 220 return -1;
@@ -254,7 +239,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
254 return 0; 239 return 0;
255 240
256 /* Add the new reserved region now. Should not fail ! */ 241 /* Add the new reserved region now. Should not fail ! */
257 BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size)); 242 BUG_ON(memblock_reserve(addr, new_size));
258 243
259 /* If the array wasn't our static init one, then free it. We only do 244 /* If the array wasn't our static init one, then free it. We only do
260 * that before SLAB is available as later on, we don't know whether 245 * that before SLAB is available as later on, we don't know whether
@@ -268,343 +253,514 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
268 return 0; 253 return 0;
269} 254}
270 255
271int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, 256/**
272 phys_addr_t addr2, phys_addr_t size2) 257 * memblock_merge_regions - merge neighboring compatible regions
273{ 258 * @type: memblock type to scan
274 return 1; 259 *
275} 260 * Scan @type and merge neighboring compatible regions.
276 261 */
277static long __init_memblock memblock_add_region(struct memblock_type *type, 262static void __init_memblock memblock_merge_regions(struct memblock_type *type)
278 phys_addr_t base, phys_addr_t size)
279{ 263{
280 phys_addr_t end = base + size; 264 int i = 0;
281 int i, slot = -1;
282
283 /* First try and coalesce this MEMBLOCK with others */
284 for (i = 0; i < type->cnt; i++) {
285 struct memblock_region *rgn = &type->regions[i];
286 phys_addr_t rend = rgn->base + rgn->size;
287 265
288 /* Exit if there's no possible hits */ 266 /* cnt never goes below 1 */
289 if (rgn->base > end || rgn->size == 0) 267 while (i < type->cnt - 1) {
290 break; 268 struct memblock_region *this = &type->regions[i];
269 struct memblock_region *next = &type->regions[i + 1];
291 270
292 /* Check if we are fully enclosed within an existing 271 if (this->base + this->size != next->base ||
293 * block 272 memblock_get_region_node(this) !=
294 */ 273 memblock_get_region_node(next)) {
295 if (rgn->base <= base && rend >= end) 274 BUG_ON(this->base + this->size > next->base);
296 return 0; 275 i++;
276 continue;
277 }
297 278
298 /* Check if we overlap or are adjacent with the bottom 279 this->size += next->size;
299 * of a block. 280 memmove(next, next + 1, (type->cnt - (i + 1)) * sizeof(*next));
300 */ 281 type->cnt--;
301 if (base < rgn->base && end >= rgn->base) { 282 }
302 /* If we can't coalesce, create a new block */ 283}
303 if (!memblock_memory_can_coalesce(base, size,
304 rgn->base,
305 rgn->size)) {
306 /* Overlap & can't coalesce are mutually
307 * exclusive, if you do that, be prepared
308 * for trouble
309 */
310 WARN_ON(end != rgn->base);
311 goto new_block;
312 }
313 /* We extend the bottom of the block down to our
314 * base
315 */
316 rgn->base = base;
317 rgn->size = rend - base;
318 284
319 /* Return if we have nothing else to allocate 285/**
320 * (fully coalesced) 286 * memblock_insert_region - insert new memblock region
321 */ 287 * @type: memblock type to insert into
322 if (rend >= end) 288 * @idx: index for the insertion point
323 return 0; 289 * @base: base address of the new region
290 * @size: size of the new region
291 *
292 * Insert new memblock region [@base,@base+@size) into @type at @idx.
293 * @type must already have extra room to accomodate the new region.
294 */
295static void __init_memblock memblock_insert_region(struct memblock_type *type,
296 int idx, phys_addr_t base,
297 phys_addr_t size, int nid)
298{
299 struct memblock_region *rgn = &type->regions[idx];
324 300
325 /* We continue processing from the end of the 301 BUG_ON(type->cnt >= type->max);
326 * coalesced block. 302 memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn));
327 */ 303 rgn->base = base;
328 base = rend; 304 rgn->size = size;
329 size = end - base; 305 memblock_set_region_node(rgn, nid);
330 } 306 type->cnt++;
307 type->total_size += size;
308}
331 309
332 /* Now check if we overlap or are adjacent with the 310/**
333 * top of a block 311 * memblock_add_region - add new memblock region
334 */ 312 * @type: memblock type to add new region into
335 if (base <= rend && end >= rend) { 313 * @base: base address of the new region
336 /* If we can't coalesce, create a new block */ 314 * @size: size of the new region
337 if (!memblock_memory_can_coalesce(rgn->base, 315 * @nid: nid of the new region
338 rgn->size, 316 *
339 base, size)) { 317 * Add new memblock region [@base,@base+@size) into @type. The new region
340 /* Overlap & can't coalesce are mutually 318 * is allowed to overlap with existing ones - overlaps don't affect already
341 * exclusive, if you do that, be prepared 319 * existing regions. @type is guaranteed to be minimal (all neighbouring
342 * for trouble 320 * compatible regions are merged) after the addition.
343 */ 321 *
344 WARN_ON(rend != base); 322 * RETURNS:
345 goto new_block; 323 * 0 on success, -errno on failure.
346 } 324 */
347 /* We adjust our base down to enclose the 325static int __init_memblock memblock_add_region(struct memblock_type *type,
348 * original block and destroy it. It will be 326 phys_addr_t base, phys_addr_t size, int nid)
349 * part of our new allocation. Since we've 327{
350 * freed an entry, we know we won't fail 328 bool insert = false;
351 * to allocate one later, so we won't risk 329 phys_addr_t obase = base;
352 * losing the original block allocation. 330 phys_addr_t end = base + memblock_cap_size(base, &size);
353 */ 331 int i, nr_new;
354 size += (base - rgn->base);
355 base = rgn->base;
356 memblock_remove_region(type, i--);
357 }
358 }
359 332
360 /* If the array is empty, special case, replace the fake 333 /* special case for empty array */
361 * filler region and return 334 if (type->regions[0].size == 0) {
362 */ 335 WARN_ON(type->cnt != 1 || type->total_size);
363 if ((type->cnt == 1) && (type->regions[0].size == 0)) {
364 type->regions[0].base = base; 336 type->regions[0].base = base;
365 type->regions[0].size = size; 337 type->regions[0].size = size;
338 memblock_set_region_node(&type->regions[0], nid);
339 type->total_size = size;
366 return 0; 340 return 0;
367 } 341 }
368 342repeat:
369 new_block: 343 /*
370 /* If we are out of space, we fail. It's too late to resize the array 344 * The following is executed twice. Once with %false @insert and
371 * but then this shouldn't have happened in the first place. 345 * then with %true. The first counts the number of regions needed
346 * to accomodate the new area. The second actually inserts them.
372 */ 347 */
373 if (WARN_ON(type->cnt >= type->max)) 348 base = obase;
374 return -1; 349 nr_new = 0;
375 350
376 /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */ 351 for (i = 0; i < type->cnt; i++) {
377 for (i = type->cnt - 1; i >= 0; i--) { 352 struct memblock_region *rgn = &type->regions[i];
378 if (base < type->regions[i].base) { 353 phys_addr_t rbase = rgn->base;
379 type->regions[i+1].base = type->regions[i].base; 354 phys_addr_t rend = rbase + rgn->size;
380 type->regions[i+1].size = type->regions[i].size; 355
381 } else { 356 if (rbase >= end)
382 type->regions[i+1].base = base;
383 type->regions[i+1].size = size;
384 slot = i + 1;
385 break; 357 break;
358 if (rend <= base)
359 continue;
360 /*
361 * @rgn overlaps. If it separates the lower part of new
362 * area, insert that portion.
363 */
364 if (rbase > base) {
365 nr_new++;
366 if (insert)
367 memblock_insert_region(type, i++, base,
368 rbase - base, nid);
386 } 369 }
370 /* area below @rend is dealt with, forget about it */
371 base = min(rend, end);
387 } 372 }
388 if (base < type->regions[0].base) { 373
389 type->regions[0].base = base; 374 /* insert the remaining portion */
390 type->regions[0].size = size; 375 if (base < end) {
391 slot = 0; 376 nr_new++;
377 if (insert)
378 memblock_insert_region(type, i, base, end - base, nid);
392 } 379 }
393 type->cnt++;
394 380
395 /* The array is full ? Try to resize it. If that fails, we undo 381 /*
396 * our allocation and return an error 382 * If this was the first round, resize array and repeat for actual
383 * insertions; otherwise, merge and return.
397 */ 384 */
398 if (type->cnt == type->max && memblock_double_array(type)) { 385 if (!insert) {
399 BUG_ON(slot < 0); 386 while (type->cnt + nr_new > type->max)
400 memblock_remove_region(type, slot); 387 if (memblock_double_array(type) < 0)
401 return -1; 388 return -ENOMEM;
389 insert = true;
390 goto repeat;
391 } else {
392 memblock_merge_regions(type);
393 return 0;
402 } 394 }
403
404 return 0;
405} 395}
406 396
407long __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) 397int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
398 int nid)
408{ 399{
409 return memblock_add_region(&memblock.memory, base, size); 400 return memblock_add_region(&memblock.memory, base, size, nid);
401}
410 402
403int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
404{
405 return memblock_add_region(&memblock.memory, base, size, MAX_NUMNODES);
411} 406}
412 407
413static long __init_memblock __memblock_remove(struct memblock_type *type, 408/**
414 phys_addr_t base, phys_addr_t size) 409 * memblock_isolate_range - isolate given range into disjoint memblocks
410 * @type: memblock type to isolate range for
411 * @base: base of range to isolate
412 * @size: size of range to isolate
413 * @start_rgn: out parameter for the start of isolated region
414 * @end_rgn: out parameter for the end of isolated region
415 *
416 * Walk @type and ensure that regions don't cross the boundaries defined by
417 * [@base,@base+@size). Crossing regions are split at the boundaries,
418 * which may create at most two more regions. The index of the first
419 * region inside the range is returned in *@start_rgn and end in *@end_rgn.
420 *
421 * RETURNS:
422 * 0 on success, -errno on failure.
423 */
424static int __init_memblock memblock_isolate_range(struct memblock_type *type,
425 phys_addr_t base, phys_addr_t size,
426 int *start_rgn, int *end_rgn)
415{ 427{
416 phys_addr_t end = base + size; 428 phys_addr_t end = base + memblock_cap_size(base, &size);
417 int i; 429 int i;
418 430
419 /* Walk through the array for collisions */ 431 *start_rgn = *end_rgn = 0;
432
433 /* we'll create at most two more regions */
434 while (type->cnt + 2 > type->max)
435 if (memblock_double_array(type) < 0)
436 return -ENOMEM;
437
420 for (i = 0; i < type->cnt; i++) { 438 for (i = 0; i < type->cnt; i++) {
421 struct memblock_region *rgn = &type->regions[i]; 439 struct memblock_region *rgn = &type->regions[i];
422 phys_addr_t rend = rgn->base + rgn->size; 440 phys_addr_t rbase = rgn->base;
441 phys_addr_t rend = rbase + rgn->size;
423 442
424 /* Nothing more to do, exit */ 443 if (rbase >= end)
425 if (rgn->base > end || rgn->size == 0)
426 break; 444 break;
427 445 if (rend <= base)
428 /* If we fully enclose the block, drop it */
429 if (base <= rgn->base && end >= rend) {
430 memblock_remove_region(type, i--);
431 continue; 446 continue;
432 }
433 447
434 /* If we are fully enclosed within a block 448 if (rbase < base) {
435 * then we need to split it and we are done 449 /*
436 */ 450 * @rgn intersects from below. Split and continue
437 if (base > rgn->base && end < rend) { 451 * to process the next region - the new top half.
438 rgn->size = base - rgn->base; 452 */
439 if (!memblock_add_region(type, end, rend - end)) 453 rgn->base = base;
440 return 0; 454 rgn->size -= base - rbase;
441 /* Failure to split is bad, we at least 455 type->total_size -= base - rbase;
442 * restore the block before erroring 456 memblock_insert_region(type, i, rbase, base - rbase,
457 memblock_get_region_node(rgn));
458 } else if (rend > end) {
459 /*
460 * @rgn intersects from above. Split and redo the
461 * current region - the new bottom half.
443 */ 462 */
444 rgn->size = rend - rgn->base;
445 WARN_ON(1);
446 return -1;
447 }
448
449 /* Check if we need to trim the bottom of a block */
450 if (rgn->base < end && rend > end) {
451 rgn->size -= end - rgn->base;
452 rgn->base = end; 463 rgn->base = end;
453 break; 464 rgn->size -= end - rbase;
465 type->total_size -= end - rbase;
466 memblock_insert_region(type, i--, rbase, end - rbase,
467 memblock_get_region_node(rgn));
468 } else {
469 /* @rgn is fully contained, record it */
470 if (!*end_rgn)
471 *start_rgn = i;
472 *end_rgn = i + 1;
454 } 473 }
474 }
455 475
456 /* And check if we need to trim the top of a block */ 476 return 0;
457 if (base < rend) 477}
458 rgn->size -= rend - base;
459 478
460 } 479static int __init_memblock __memblock_remove(struct memblock_type *type,
480 phys_addr_t base, phys_addr_t size)
481{
482 int start_rgn, end_rgn;
483 int i, ret;
484
485 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
486 if (ret)
487 return ret;
488
489 for (i = end_rgn - 1; i >= start_rgn; i--)
490 memblock_remove_region(type, i);
461 return 0; 491 return 0;
462} 492}
463 493
464long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) 494int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
465{ 495{
466 return __memblock_remove(&memblock.memory, base, size); 496 return __memblock_remove(&memblock.memory, base, size);
467} 497}
468 498
469long __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) 499int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
470{ 500{
501 memblock_dbg(" memblock_free: [%#016llx-%#016llx] %pF\n",
502 (unsigned long long)base,
503 (unsigned long long)base + size,
504 (void *)_RET_IP_);
505
471 return __memblock_remove(&memblock.reserved, base, size); 506 return __memblock_remove(&memblock.reserved, base, size);
472} 507}
473 508
474long __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) 509int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
475{ 510{
476 struct memblock_type *_rgn = &memblock.reserved; 511 struct memblock_type *_rgn = &memblock.reserved;
477 512
513 memblock_dbg("memblock_reserve: [%#016llx-%#016llx] %pF\n",
514 (unsigned long long)base,
515 (unsigned long long)base + size,
516 (void *)_RET_IP_);
478 BUG_ON(0 == size); 517 BUG_ON(0 == size);
479 518
480 return memblock_add_region(_rgn, base, size); 519 return memblock_add_region(_rgn, base, size, MAX_NUMNODES);
481} 520}
482 521
483phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) 522/**
523 * __next_free_mem_range - next function for for_each_free_mem_range()
524 * @idx: pointer to u64 loop variable
525 * @nid: nid: node selector, %MAX_NUMNODES for all nodes
526 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
527 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
528 * @p_nid: ptr to int for nid of the range, can be %NULL
529 *
530 * Find the first free area from *@idx which matches @nid, fill the out
531 * parameters, and update *@idx for the next iteration. The lower 32bit of
532 * *@idx contains index into memory region and the upper 32bit indexes the
533 * areas before each reserved region. For example, if reserved regions
534 * look like the following,
535 *
536 * 0:[0-16), 1:[32-48), 2:[128-130)
537 *
538 * The upper 32bit indexes the following regions.
539 *
540 * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX)
541 *
542 * As both region arrays are sorted, the function advances the two indices
543 * in lockstep and returns each intersection.
544 */
545void __init_memblock __next_free_mem_range(u64 *idx, int nid,
546 phys_addr_t *out_start,
547 phys_addr_t *out_end, int *out_nid)
484{ 548{
485 phys_addr_t found; 549 struct memblock_type *mem = &memblock.memory;
550 struct memblock_type *rsv = &memblock.reserved;
551 int mi = *idx & 0xffffffff;
552 int ri = *idx >> 32;
486 553
487 /* We align the size to limit fragmentation. Without this, a lot of 554 for ( ; mi < mem->cnt; mi++) {
488 * small allocs quickly eat up the whole reserve array on sparc 555 struct memblock_region *m = &mem->regions[mi];
489 */ 556 phys_addr_t m_start = m->base;
490 size = memblock_align_up(size, align); 557 phys_addr_t m_end = m->base + m->size;
491 558
492 found = memblock_find_base(size, align, 0, max_addr); 559 /* only memory regions are associated with nodes, check it */
493 if (found != MEMBLOCK_ERROR && 560 if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m))
494 !memblock_add_region(&memblock.reserved, found, size)) 561 continue;
495 return found;
496 562
497 return 0; 563 /* scan areas before each reservation for intersection */
564 for ( ; ri < rsv->cnt + 1; ri++) {
565 struct memblock_region *r = &rsv->regions[ri];
566 phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0;
567 phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX;
568
569 /* if ri advanced past mi, break out to advance mi */
570 if (r_start >= m_end)
571 break;
572 /* if the two regions intersect, we're done */
573 if (m_start < r_end) {
574 if (out_start)
575 *out_start = max(m_start, r_start);
576 if (out_end)
577 *out_end = min(m_end, r_end);
578 if (out_nid)
579 *out_nid = memblock_get_region_node(m);
580 /*
581 * The region which ends first is advanced
582 * for the next iteration.
583 */
584 if (m_end <= r_end)
585 mi++;
586 else
587 ri++;
588 *idx = (u32)mi | (u64)ri << 32;
589 return;
590 }
591 }
592 }
593
594 /* signal end of iteration */
595 *idx = ULLONG_MAX;
498} 596}
499 597
500phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) 598/**
599 * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse()
600 * @idx: pointer to u64 loop variable
601 * @nid: nid: node selector, %MAX_NUMNODES for all nodes
602 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
603 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
604 * @p_nid: ptr to int for nid of the range, can be %NULL
605 *
606 * Reverse of __next_free_mem_range().
607 */
608void __init_memblock __next_free_mem_range_rev(u64 *idx, int nid,
609 phys_addr_t *out_start,
610 phys_addr_t *out_end, int *out_nid)
501{ 611{
502 phys_addr_t alloc; 612 struct memblock_type *mem = &memblock.memory;
613 struct memblock_type *rsv = &memblock.reserved;
614 int mi = *idx & 0xffffffff;
615 int ri = *idx >> 32;
503 616
504 alloc = __memblock_alloc_base(size, align, max_addr); 617 if (*idx == (u64)ULLONG_MAX) {
618 mi = mem->cnt - 1;
619 ri = rsv->cnt;
620 }
505 621
506 if (alloc == 0) 622 for ( ; mi >= 0; mi--) {
507 panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n", 623 struct memblock_region *m = &mem->regions[mi];
508 (unsigned long long) size, (unsigned long long) max_addr); 624 phys_addr_t m_start = m->base;
625 phys_addr_t m_end = m->base + m->size;
509 626
510 return alloc; 627 /* only memory regions are associated with nodes, check it */
511} 628 if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m))
629 continue;
512 630
513phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) 631 /* scan areas before each reservation for intersection */
514{ 632 for ( ; ri >= 0; ri--) {
515 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); 633 struct memblock_region *r = &rsv->regions[ri];
516} 634 phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0;
635 phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX;
636
637 /* if ri advanced past mi, break out to advance mi */
638 if (r_end <= m_start)
639 break;
640 /* if the two regions intersect, we're done */
641 if (m_end > r_start) {
642 if (out_start)
643 *out_start = max(m_start, r_start);
644 if (out_end)
645 *out_end = min(m_end, r_end);
646 if (out_nid)
647 *out_nid = memblock_get_region_node(m);
648
649 if (m_start >= r_start)
650 mi--;
651 else
652 ri--;
653 *idx = (u32)mi | (u64)ri << 32;
654 return;
655 }
656 }
657 }
517 658
659 *idx = ULLONG_MAX;
660}
518 661
662#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
519/* 663/*
520 * Additional node-local allocators. Search for node memory is bottom up 664 * Common iterator interface used to define for_each_mem_range().
521 * and walks memblock regions within that node bottom-up as well, but allocation
522 * within an memblock region is top-down. XXX I plan to fix that at some stage
523 *
524 * WARNING: Only available after early_node_map[] has been populated,
525 * on some architectures, that is after all the calls to add_active_range()
526 * have been done to populate it.
527 */ 665 */
528 666void __init_memblock __next_mem_pfn_range(int *idx, int nid,
529phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid) 667 unsigned long *out_start_pfn,
668 unsigned long *out_end_pfn, int *out_nid)
530{ 669{
531#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 670 struct memblock_type *type = &memblock.memory;
532 /* 671 struct memblock_region *r;
533 * This code originates from sparc which really wants use to walk by addresses
534 * and returns the nid. This is not very convenient for early_pfn_map[] users
535 * as the map isn't sorted yet, and it really wants to be walked by nid.
536 *
537 * For now, I implement the inefficient method below which walks the early
538 * map multiple times. Eventually we may want to use an ARCH config option
539 * to implement a completely different method for both case.
540 */
541 unsigned long start_pfn, end_pfn;
542 int i;
543 672
544 for (i = 0; i < MAX_NUMNODES; i++) { 673 while (++*idx < type->cnt) {
545 get_pfn_range_for_nid(i, &start_pfn, &end_pfn); 674 r = &type->regions[*idx];
546 if (start < PFN_PHYS(start_pfn) || start >= PFN_PHYS(end_pfn)) 675
676 if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size))
547 continue; 677 continue;
548 *nid = i; 678 if (nid == MAX_NUMNODES || nid == r->nid)
549 return min(end, PFN_PHYS(end_pfn)); 679 break;
680 }
681 if (*idx >= type->cnt) {
682 *idx = -1;
683 return;
550 } 684 }
551#endif
552 *nid = 0;
553 685
554 return end; 686 if (out_start_pfn)
687 *out_start_pfn = PFN_UP(r->base);
688 if (out_end_pfn)
689 *out_end_pfn = PFN_DOWN(r->base + r->size);
690 if (out_nid)
691 *out_nid = r->nid;
555} 692}
556 693
557static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp, 694/**
558 phys_addr_t size, 695 * memblock_set_node - set node ID on memblock regions
559 phys_addr_t align, int nid) 696 * @base: base of area to set node ID for
697 * @size: size of area to set node ID for
698 * @nid: node ID to set
699 *
700 * Set the nid of memblock memory regions in [@base,@base+@size) to @nid.
701 * Regions which cross the area boundaries are split as necessary.
702 *
703 * RETURNS:
704 * 0 on success, -errno on failure.
705 */
706int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
707 int nid)
560{ 708{
561 phys_addr_t start, end; 709 struct memblock_type *type = &memblock.memory;
710 int start_rgn, end_rgn;
711 int i, ret;
562 712
563 start = mp->base; 713 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
564 end = start + mp->size; 714 if (ret)
715 return ret;
565 716
566 start = memblock_align_up(start, align); 717 for (i = start_rgn; i < end_rgn; i++)
567 while (start < end) { 718 type->regions[i].nid = nid;
568 phys_addr_t this_end;
569 int this_nid;
570 719
571 this_end = memblock_nid_range(start, end, &this_nid); 720 memblock_merge_regions(type);
572 if (this_nid == nid) { 721 return 0;
573 phys_addr_t ret = memblock_find_region(start, this_end, size, align); 722}
574 if (ret != MEMBLOCK_ERROR && 723#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
575 !memblock_add_region(&memblock.reserved, ret, size)) 724
576 return ret; 725static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
577 } 726 phys_addr_t align, phys_addr_t max_addr,
578 start = this_end; 727 int nid)
579 } 728{
729 phys_addr_t found;
580 730
581 return MEMBLOCK_ERROR; 731 found = memblock_find_in_range_node(0, max_addr, size, align, nid);
732 if (found && !memblock_reserve(found, size))
733 return found;
734
735 return 0;
582} 736}
583 737
584phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) 738phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
585{ 739{
586 struct memblock_type *mem = &memblock.memory; 740 return memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE, nid);
587 int i; 741}
588 742
589 BUG_ON(0 == size); 743phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
744{
745 return memblock_alloc_base_nid(size, align, max_addr, MAX_NUMNODES);
746}
590 747
591 /* We align the size to limit fragmentation. Without this, a lot of 748phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
592 * small allocs quickly eat up the whole reserve array on sparc 749{
593 */ 750 phys_addr_t alloc;
594 size = memblock_align_up(size, align);
595 751
596 /* We do a bottom-up search for a region with the right 752 alloc = __memblock_alloc_base(size, align, max_addr);
597 * nid since that's easier considering how memblock_nid_range()
598 * works
599 */
600 for (i = 0; i < mem->cnt; i++) {
601 phys_addr_t ret = memblock_alloc_nid_region(&mem->regions[i],
602 size, align, nid);
603 if (ret != MEMBLOCK_ERROR)
604 return ret;
605 }
606 753
607 return 0; 754 if (alloc == 0)
755 panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
756 (unsigned long long) size, (unsigned long long) max_addr);
757
758 return alloc;
759}
760
761phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align)
762{
763 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
608} 764}
609 765
610phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) 766phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
@@ -613,7 +769,7 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i
613 769
614 if (res) 770 if (res)
615 return res; 771 return res;
616 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE); 772 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
617} 773}
618 774
619 775
@@ -621,10 +777,9 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i
621 * Remaining API functions 777 * Remaining API functions
622 */ 778 */
623 779
624/* You must call memblock_analyze() before this. */
625phys_addr_t __init memblock_phys_mem_size(void) 780phys_addr_t __init memblock_phys_mem_size(void)
626{ 781{
627 return memblock.memory_size; 782 return memblock.memory.total_size;
628} 783}
629 784
630/* lowest address */ 785/* lowest address */
@@ -640,45 +795,28 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void)
640 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); 795 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
641} 796}
642 797
643/* You must call memblock_analyze() after this. */ 798void __init memblock_enforce_memory_limit(phys_addr_t limit)
644void __init memblock_enforce_memory_limit(phys_addr_t memory_limit)
645{ 799{
646 unsigned long i; 800 unsigned long i;
647 phys_addr_t limit; 801 phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
648 struct memblock_region *p;
649 802
650 if (!memory_limit) 803 if (!limit)
651 return; 804 return;
652 805
653 /* Truncate the memblock regions to satisfy the memory limit. */ 806 /* find out max address */
654 limit = memory_limit;
655 for (i = 0; i < memblock.memory.cnt; i++) { 807 for (i = 0; i < memblock.memory.cnt; i++) {
656 if (limit > memblock.memory.regions[i].size) { 808 struct memblock_region *r = &memblock.memory.regions[i];
657 limit -= memblock.memory.regions[i].size;
658 continue;
659 }
660
661 memblock.memory.regions[i].size = limit;
662 memblock.memory.cnt = i + 1;
663 break;
664 }
665
666 memory_limit = memblock_end_of_DRAM();
667 809
668 /* And truncate any reserves above the limit also. */ 810 if (limit <= r->size) {
669 for (i = 0; i < memblock.reserved.cnt; i++) { 811 max_addr = r->base + limit;
670 p = &memblock.reserved.regions[i]; 812 break;
671
672 if (p->base > memory_limit)
673 p->size = 0;
674 else if ((p->base + p->size) > memory_limit)
675 p->size = memory_limit - p->base;
676
677 if (p->size == 0) {
678 memblock_remove_region(&memblock.reserved, i);
679 i--;
680 } 813 }
814 limit -= r->size;
681 } 815 }
816
817 /* truncate both memory and reserved regions */
818 __memblock_remove(&memblock.memory, max_addr, (phys_addr_t)ULLONG_MAX);
819 __memblock_remove(&memblock.reserved, max_addr, (phys_addr_t)ULLONG_MAX);
682} 820}
683 821
684static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) 822static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
@@ -712,16 +850,18 @@ int __init_memblock memblock_is_memory(phys_addr_t addr)
712int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) 850int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
713{ 851{
714 int idx = memblock_search(&memblock.memory, base); 852 int idx = memblock_search(&memblock.memory, base);
853 phys_addr_t end = base + memblock_cap_size(base, &size);
715 854
716 if (idx == -1) 855 if (idx == -1)
717 return 0; 856 return 0;
718 return memblock.memory.regions[idx].base <= base && 857 return memblock.memory.regions[idx].base <= base &&
719 (memblock.memory.regions[idx].base + 858 (memblock.memory.regions[idx].base +
720 memblock.memory.regions[idx].size) >= (base + size); 859 memblock.memory.regions[idx].size) >= end;
721} 860}
722 861
723int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) 862int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
724{ 863{
864 memblock_cap_size(base, &size);
725 return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; 865 return memblock_overlaps_region(&memblock.reserved, base, size) >= 0;
726} 866}
727 867
@@ -731,86 +871,45 @@ void __init_memblock memblock_set_current_limit(phys_addr_t limit)
731 memblock.current_limit = limit; 871 memblock.current_limit = limit;
732} 872}
733 873
734static void __init_memblock memblock_dump(struct memblock_type *region, char *name) 874static void __init_memblock memblock_dump(struct memblock_type *type, char *name)
735{ 875{
736 unsigned long long base, size; 876 unsigned long long base, size;
737 int i; 877 int i;
738 878
739 pr_info(" %s.cnt = 0x%lx\n", name, region->cnt); 879 pr_info(" %s.cnt = 0x%lx\n", name, type->cnt);
740 880
741 for (i = 0; i < region->cnt; i++) { 881 for (i = 0; i < type->cnt; i++) {
742 base = region->regions[i].base; 882 struct memblock_region *rgn = &type->regions[i];
743 size = region->regions[i].size; 883 char nid_buf[32] = "";
744 884
745 pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes\n", 885 base = rgn->base;
746 name, i, base, base + size - 1, size); 886 size = rgn->size;
887#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
888 if (memblock_get_region_node(rgn) != MAX_NUMNODES)
889 snprintf(nid_buf, sizeof(nid_buf), " on node %d",
890 memblock_get_region_node(rgn));
891#endif
892 pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes%s\n",
893 name, i, base, base + size - 1, size, nid_buf);
747 } 894 }
748} 895}
749 896
750void __init_memblock memblock_dump_all(void) 897void __init_memblock __memblock_dump_all(void)
751{ 898{
752 if (!memblock_debug)
753 return;
754
755 pr_info("MEMBLOCK configuration:\n"); 899 pr_info("MEMBLOCK configuration:\n");
756 pr_info(" memory size = 0x%llx\n", (unsigned long long)memblock.memory_size); 900 pr_info(" memory size = %#llx reserved size = %#llx\n",
901 (unsigned long long)memblock.memory.total_size,
902 (unsigned long long)memblock.reserved.total_size);
757 903
758 memblock_dump(&memblock.memory, "memory"); 904 memblock_dump(&memblock.memory, "memory");
759 memblock_dump(&memblock.reserved, "reserved"); 905 memblock_dump(&memblock.reserved, "reserved");
760} 906}
761 907
762void __init memblock_analyze(void) 908void __init memblock_allow_resize(void)
763{ 909{
764 int i;
765
766 /* Check marker in the unused last array entry */
767 WARN_ON(memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS].base
768 != MEMBLOCK_INACTIVE);
769 WARN_ON(memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS].base
770 != MEMBLOCK_INACTIVE);
771
772 memblock.memory_size = 0;
773
774 for (i = 0; i < memblock.memory.cnt; i++)
775 memblock.memory_size += memblock.memory.regions[i].size;
776
777 /* We allow resizing from there */
778 memblock_can_resize = 1; 910 memblock_can_resize = 1;
779} 911}
780 912
781void __init memblock_init(void)
782{
783 static int init_done __initdata = 0;
784
785 if (init_done)
786 return;
787 init_done = 1;
788
789 /* Hookup the initial arrays */
790 memblock.memory.regions = memblock_memory_init_regions;
791 memblock.memory.max = INIT_MEMBLOCK_REGIONS;
792 memblock.reserved.regions = memblock_reserved_init_regions;
793 memblock.reserved.max = INIT_MEMBLOCK_REGIONS;
794
795 /* Write a marker in the unused last array entry */
796 memblock.memory.regions[INIT_MEMBLOCK_REGIONS].base = MEMBLOCK_INACTIVE;
797 memblock.reserved.regions[INIT_MEMBLOCK_REGIONS].base = MEMBLOCK_INACTIVE;
798
799 /* Create a dummy zero size MEMBLOCK which will get coalesced away later.
800 * This simplifies the memblock_add() code below...
801 */
802 memblock.memory.regions[0].base = 0;
803 memblock.memory.regions[0].size = 0;
804 memblock.memory.cnt = 1;
805
806 /* Ditto. */
807 memblock.reserved.regions[0].base = 0;
808 memblock.reserved.regions[0].size = 0;
809 memblock.reserved.cnt = 1;
810
811 memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE;
812}
813
814static int __init early_memblock(char *p) 913static int __init early_memblock(char *p)
815{ 914{
816 if (p && strstr(p, "debug")) 915 if (p && strstr(p, "debug"))
@@ -819,7 +918,7 @@ static int __init early_memblock(char *p)
819} 918}
820early_param("memblock", early_memblock); 919early_param("memblock", early_memblock);
821 920
822#if defined(CONFIG_DEBUG_FS) && !defined(ARCH_DISCARD_MEMBLOCK) 921#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_ARCH_DISCARD_MEMBLOCK)
823 922
824static int memblock_debug_show(struct seq_file *m, void *private) 923static int memblock_debug_show(struct seq_file *m, void *private)
825{ 924{
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 7fa41b4a07bf..24f0fc1a56d6 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -41,14 +41,13 @@ static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
41 if (limit > memblock.current_limit) 41 if (limit > memblock.current_limit)
42 limit = memblock.current_limit; 42 limit = memblock.current_limit;
43 43
44 addr = find_memory_core_early(nid, size, align, goal, limit); 44 addr = memblock_find_in_range_node(goal, limit, size, align, nid);
45 45 if (!addr)
46 if (addr == MEMBLOCK_ERROR)
47 return NULL; 46 return NULL;
48 47
49 ptr = phys_to_virt(addr); 48 ptr = phys_to_virt(addr);
50 memset(ptr, 0, size); 49 memset(ptr, 0, size);
51 memblock_x86_reserve_range(addr, addr + size, "BOOTMEM"); 50 memblock_reserve(addr, size);
52 /* 51 /*
53 * The min_count is set to 0 so that bootmem allocated blocks 52 * The min_count is set to 0 so that bootmem allocated blocks
54 * are never reported as leaks. 53 * are never reported as leaks.
@@ -107,23 +106,27 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end)
107 __free_pages_bootmem(pfn_to_page(i), 0); 106 __free_pages_bootmem(pfn_to_page(i), 0);
108} 107}
109 108
110unsigned long __init free_all_memory_core_early(int nodeid) 109unsigned long __init free_low_memory_core_early(int nodeid)
111{ 110{
112 int i;
113 u64 start, end;
114 unsigned long count = 0; 111 unsigned long count = 0;
115 struct range *range = NULL; 112 phys_addr_t start, end;
116 int nr_range; 113 u64 i;
117 114
118 nr_range = get_free_all_memory_range(&range, nodeid); 115 /* free reserved array temporarily so that it's treated as free area */
119 116 memblock_free_reserved_regions();
120 for (i = 0; i < nr_range; i++) { 117
121 start = range[i].start; 118 for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) {
122 end = range[i].end; 119 unsigned long start_pfn = PFN_UP(start);
123 count += end - start; 120 unsigned long end_pfn = min_t(unsigned long,
124 __free_pages_memory(start, end); 121 PFN_DOWN(end), max_low_pfn);
122 if (start_pfn < end_pfn) {
123 __free_pages_memory(start_pfn, end_pfn);
124 count += end_pfn - start_pfn;
125 }
125 } 126 }
126 127
128 /* put region array back? */
129 memblock_reserve_reserved_regions();
127 return count; 130 return count;
128} 131}
129 132
@@ -137,7 +140,7 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
137{ 140{
138 register_page_bootmem_info_node(pgdat); 141 register_page_bootmem_info_node(pgdat);
139 142
140 /* free_all_memory_core_early(MAX_NUMNODES) will be called later */ 143 /* free_low_memory_core_early(MAX_NUMNODES) will be called later */
141 return 0; 144 return 0;
142} 145}
143 146
@@ -155,7 +158,7 @@ unsigned long __init free_all_bootmem(void)
155 * Use MAX_NUMNODES will make sure all ranges in early_node_map[] 158 * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
156 * will be used instead of only Node0 related 159 * will be used instead of only Node0 related
157 */ 160 */
158 return free_all_memory_core_early(MAX_NUMNODES); 161 return free_low_memory_core_early(MAX_NUMNODES);
159} 162}
160 163
161/** 164/**
@@ -172,7 +175,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
172 unsigned long size) 175 unsigned long size)
173{ 176{
174 kmemleak_free_part(__va(physaddr), size); 177 kmemleak_free_part(__va(physaddr), size);
175 memblock_x86_free_range(physaddr, physaddr + size); 178 memblock_free(physaddr, size);
176} 179}
177 180
178/** 181/**
@@ -187,7 +190,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
187void __init free_bootmem(unsigned long addr, unsigned long size) 190void __init free_bootmem(unsigned long addr, unsigned long size)
188{ 191{
189 kmemleak_free_part(__va(addr), size); 192 kmemleak_free_part(__va(addr), size);
190 memblock_x86_free_range(addr, addr + size); 193 memblock_free(addr, size);
191} 194}
192 195
193static void * __init ___alloc_bootmem_nopanic(unsigned long size, 196static void * __init ___alloc_bootmem_nopanic(unsigned long size,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2b8ba3aebf6e..bdc804c2d99c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -181,39 +181,17 @@ static unsigned long __meminitdata nr_kernel_pages;
181static unsigned long __meminitdata nr_all_pages; 181static unsigned long __meminitdata nr_all_pages;
182static unsigned long __meminitdata dma_reserve; 182static unsigned long __meminitdata dma_reserve;
183 183
184#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 184#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
185 /* 185static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
186 * MAX_ACTIVE_REGIONS determines the maximum number of distinct 186static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
187 * ranges of memory (RAM) that may be registered with add_active_range(). 187static unsigned long __initdata required_kernelcore;
188 * Ranges passed to add_active_range() will be merged if possible 188static unsigned long __initdata required_movablecore;
189 * so the number of times add_active_range() can be called is 189static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
190 * related to the number of nodes and the number of holes 190
191 */ 191/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
192 #ifdef CONFIG_MAX_ACTIVE_REGIONS 192int movable_zone;
193 /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */ 193EXPORT_SYMBOL(movable_zone);
194 #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS 194#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
195 #else
196 #if MAX_NUMNODES >= 32
197 /* If there can be many nodes, allow up to 50 holes per node */
198 #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50)
199 #else
200 /* By default, allow up to 256 distinct regions */
201 #define MAX_ACTIVE_REGIONS 256
202 #endif
203 #endif
204
205 static struct node_active_region __meminitdata early_node_map[MAX_ACTIVE_REGIONS];
206 static int __meminitdata nr_nodemap_entries;
207 static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
208 static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
209 static unsigned long __initdata required_kernelcore;
210 static unsigned long __initdata required_movablecore;
211 static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
212
213 /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
214 int movable_zone;
215 EXPORT_SYMBOL(movable_zone);
216#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
217 195
218#if MAX_NUMNODES > 1 196#if MAX_NUMNODES > 1
219int nr_node_ids __read_mostly = MAX_NUMNODES; 197int nr_node_ids __read_mostly = MAX_NUMNODES;
@@ -706,10 +684,10 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
706 int loop; 684 int loop;
707 685
708 prefetchw(page); 686 prefetchw(page);
709 for (loop = 0; loop < BITS_PER_LONG; loop++) { 687 for (loop = 0; loop < (1 << order); loop++) {
710 struct page *p = &page[loop]; 688 struct page *p = &page[loop];
711 689
712 if (loop + 1 < BITS_PER_LONG) 690 if (loop + 1 < (1 << order))
713 prefetchw(p + 1); 691 prefetchw(p + 1);
714 __ClearPageReserved(p); 692 __ClearPageReserved(p);
715 set_page_count(p, 0); 693 set_page_count(p, 0);
@@ -3737,35 +3715,7 @@ __meminit int init_currently_empty_zone(struct zone *zone,
3737 return 0; 3715 return 0;
3738} 3716}
3739 3717
3740#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 3718#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
3741/*
3742 * Basic iterator support. Return the first range of PFNs for a node
3743 * Note: nid == MAX_NUMNODES returns first region regardless of node
3744 */
3745static int __meminit first_active_region_index_in_nid(int nid)
3746{
3747 int i;
3748
3749 for (i = 0; i < nr_nodemap_entries; i++)
3750 if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
3751 return i;
3752
3753 return -1;
3754}
3755
3756/*
3757 * Basic iterator support. Return the next active range of PFNs for a node
3758 * Note: nid == MAX_NUMNODES returns next region regardless of node
3759 */
3760static int __meminit next_active_region_index_in_nid(int index, int nid)
3761{
3762 for (index = index + 1; index < nr_nodemap_entries; index++)
3763 if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
3764 return index;
3765
3766 return -1;
3767}
3768
3769#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID 3719#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
3770/* 3720/*
3771 * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. 3721 * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
@@ -3775,15 +3725,12 @@ static int __meminit next_active_region_index_in_nid(int index, int nid)
3775 */ 3725 */
3776int __meminit __early_pfn_to_nid(unsigned long pfn) 3726int __meminit __early_pfn_to_nid(unsigned long pfn)
3777{ 3727{
3778 int i; 3728 unsigned long start_pfn, end_pfn;
3779 3729 int i, nid;
3780 for (i = 0; i < nr_nodemap_entries; i++) {
3781 unsigned long start_pfn = early_node_map[i].start_pfn;
3782 unsigned long end_pfn = early_node_map[i].end_pfn;
3783 3730
3731 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
3784 if (start_pfn <= pfn && pfn < end_pfn) 3732 if (start_pfn <= pfn && pfn < end_pfn)
3785 return early_node_map[i].nid; 3733 return nid;
3786 }
3787 /* This is a memory hole */ 3734 /* This is a memory hole */
3788 return -1; 3735 return -1;
3789} 3736}
@@ -3812,11 +3759,6 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
3812} 3759}
3813#endif 3760#endif
3814 3761
3815/* Basic iterator support to walk early_node_map[] */
3816#define for_each_active_range_index_in_nid(i, nid) \
3817 for (i = first_active_region_index_in_nid(nid); i != -1; \
3818 i = next_active_region_index_in_nid(i, nid))
3819
3820/** 3762/**
3821 * free_bootmem_with_active_regions - Call free_bootmem_node for each active range 3763 * free_bootmem_with_active_regions - Call free_bootmem_node for each active range
3822 * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. 3764 * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
@@ -3826,122 +3768,34 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
3826 * add_active_ranges() contain no holes and may be freed, this 3768 * add_active_ranges() contain no holes and may be freed, this
3827 * this function may be used instead of calling free_bootmem() manually. 3769 * this function may be used instead of calling free_bootmem() manually.
3828 */ 3770 */
3829void __init free_bootmem_with_active_regions(int nid, 3771void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
3830 unsigned long max_low_pfn)
3831{
3832 int i;
3833
3834 for_each_active_range_index_in_nid(i, nid) {
3835 unsigned long size_pages = 0;
3836 unsigned long end_pfn = early_node_map[i].end_pfn;
3837
3838 if (early_node_map[i].start_pfn >= max_low_pfn)
3839 continue;
3840
3841 if (end_pfn > max_low_pfn)
3842 end_pfn = max_low_pfn;
3843
3844 size_pages = end_pfn - early_node_map[i].start_pfn;
3845 free_bootmem_node(NODE_DATA(early_node_map[i].nid),
3846 PFN_PHYS(early_node_map[i].start_pfn),
3847 size_pages << PAGE_SHIFT);
3848 }
3849}
3850
3851#ifdef CONFIG_HAVE_MEMBLOCK
3852/*
3853 * Basic iterator support. Return the last range of PFNs for a node
3854 * Note: nid == MAX_NUMNODES returns last region regardless of node
3855 */
3856static int __meminit last_active_region_index_in_nid(int nid)
3857{ 3772{
3858 int i; 3773 unsigned long start_pfn, end_pfn;
3859 3774 int i, this_nid;
3860 for (i = nr_nodemap_entries - 1; i >= 0; i--)
3861 if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
3862 return i;
3863
3864 return -1;
3865}
3866
3867/*
3868 * Basic iterator support. Return the previous active range of PFNs for a node
3869 * Note: nid == MAX_NUMNODES returns next region regardless of node
3870 */
3871static int __meminit previous_active_region_index_in_nid(int index, int nid)
3872{
3873 for (index = index - 1; index >= 0; index--)
3874 if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
3875 return index;
3876
3877 return -1;
3878}
3879
3880#define for_each_active_range_index_in_nid_reverse(i, nid) \
3881 for (i = last_active_region_index_in_nid(nid); i != -1; \
3882 i = previous_active_region_index_in_nid(i, nid))
3883
3884u64 __init find_memory_core_early(int nid, u64 size, u64 align,
3885 u64 goal, u64 limit)
3886{
3887 int i;
3888
3889 /* Need to go over early_node_map to find out good range for node */
3890 for_each_active_range_index_in_nid_reverse(i, nid) {
3891 u64 addr;
3892 u64 ei_start, ei_last;
3893 u64 final_start, final_end;
3894
3895 ei_last = early_node_map[i].end_pfn;
3896 ei_last <<= PAGE_SHIFT;
3897 ei_start = early_node_map[i].start_pfn;
3898 ei_start <<= PAGE_SHIFT;
3899
3900 final_start = max(ei_start, goal);
3901 final_end = min(ei_last, limit);
3902
3903 if (final_start >= final_end)
3904 continue;
3905
3906 addr = memblock_find_in_range(final_start, final_end, size, align);
3907 3775
3908 if (addr == MEMBLOCK_ERROR) 3776 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) {
3909 continue; 3777 start_pfn = min(start_pfn, max_low_pfn);
3778 end_pfn = min(end_pfn, max_low_pfn);
3910 3779
3911 return addr; 3780 if (start_pfn < end_pfn)
3781 free_bootmem_node(NODE_DATA(this_nid),
3782 PFN_PHYS(start_pfn),
3783 (end_pfn - start_pfn) << PAGE_SHIFT);
3912 } 3784 }
3913
3914 return MEMBLOCK_ERROR;
3915} 3785}
3916#endif
3917 3786
3918int __init add_from_early_node_map(struct range *range, int az, 3787int __init add_from_early_node_map(struct range *range, int az,
3919 int nr_range, int nid) 3788 int nr_range, int nid)
3920{ 3789{
3790 unsigned long start_pfn, end_pfn;
3921 int i; 3791 int i;
3922 u64 start, end;
3923 3792
3924 /* need to go over early_node_map to find out good range for node */ 3793 /* need to go over early_node_map to find out good range for node */
3925 for_each_active_range_index_in_nid(i, nid) { 3794 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL)
3926 start = early_node_map[i].start_pfn; 3795 nr_range = add_range(range, az, nr_range, start_pfn, end_pfn);
3927 end = early_node_map[i].end_pfn;
3928 nr_range = add_range(range, az, nr_range, start, end);
3929 }
3930 return nr_range; 3796 return nr_range;
3931} 3797}
3932 3798
3933void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3934{
3935 int i;
3936 int ret;
3937
3938 for_each_active_range_index_in_nid(i, nid) {
3939 ret = work_fn(early_node_map[i].start_pfn,
3940 early_node_map[i].end_pfn, data);
3941 if (ret)
3942 break;
3943 }
3944}
3945/** 3799/**
3946 * sparse_memory_present_with_active_regions - Call memory_present for each active range 3800 * sparse_memory_present_with_active_regions - Call memory_present for each active range
3947 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. 3801 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
@@ -3952,12 +3806,11 @@ void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3952 */ 3806 */
3953void __init sparse_memory_present_with_active_regions(int nid) 3807void __init sparse_memory_present_with_active_regions(int nid)
3954{ 3808{
3955 int i; 3809 unsigned long start_pfn, end_pfn;
3810 int i, this_nid;
3956 3811
3957 for_each_active_range_index_in_nid(i, nid) 3812 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
3958 memory_present(early_node_map[i].nid, 3813 memory_present(this_nid, start_pfn, end_pfn);
3959 early_node_map[i].start_pfn,
3960 early_node_map[i].end_pfn);
3961} 3814}
3962 3815
3963/** 3816/**
@@ -3974,13 +3827,15 @@ void __init sparse_memory_present_with_active_regions(int nid)
3974void __meminit get_pfn_range_for_nid(unsigned int nid, 3827void __meminit get_pfn_range_for_nid(unsigned int nid,
3975 unsigned long *start_pfn, unsigned long *end_pfn) 3828 unsigned long *start_pfn, unsigned long *end_pfn)
3976{ 3829{
3830 unsigned long this_start_pfn, this_end_pfn;
3977 int i; 3831 int i;
3832
3978 *start_pfn = -1UL; 3833 *start_pfn = -1UL;
3979 *end_pfn = 0; 3834 *end_pfn = 0;
3980 3835
3981 for_each_active_range_index_in_nid(i, nid) { 3836 for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) {
3982 *start_pfn = min(*start_pfn, early_node_map[i].start_pfn); 3837 *start_pfn = min(*start_pfn, this_start_pfn);
3983 *end_pfn = max(*end_pfn, early_node_map[i].end_pfn); 3838 *end_pfn = max(*end_pfn, this_end_pfn);
3984 } 3839 }
3985 3840
3986 if (*start_pfn == -1UL) 3841 if (*start_pfn == -1UL)
@@ -4083,46 +3938,16 @@ unsigned long __meminit __absent_pages_in_range(int nid,
4083 unsigned long range_start_pfn, 3938 unsigned long range_start_pfn,
4084 unsigned long range_end_pfn) 3939 unsigned long range_end_pfn)
4085{ 3940{
4086 int i = 0; 3941 unsigned long nr_absent = range_end_pfn - range_start_pfn;
4087 unsigned long prev_end_pfn = 0, hole_pages = 0; 3942 unsigned long start_pfn, end_pfn;
4088 unsigned long start_pfn; 3943 int i;
4089
4090 /* Find the end_pfn of the first active range of pfns in the node */
4091 i = first_active_region_index_in_nid(nid);
4092 if (i == -1)
4093 return 0;
4094
4095 prev_end_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
4096
4097 /* Account for ranges before physical memory on this node */
4098 if (early_node_map[i].start_pfn > range_start_pfn)
4099 hole_pages = prev_end_pfn - range_start_pfn;
4100
4101 /* Find all holes for the zone within the node */
4102 for (; i != -1; i = next_active_region_index_in_nid(i, nid)) {
4103
4104 /* No need to continue if prev_end_pfn is outside the zone */
4105 if (prev_end_pfn >= range_end_pfn)
4106 break;
4107
4108 /* Make sure the end of the zone is not within the hole */
4109 start_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
4110 prev_end_pfn = max(prev_end_pfn, range_start_pfn);
4111 3944
4112 /* Update the hole size cound and move on */ 3945 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
4113 if (start_pfn > range_start_pfn) { 3946 start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
4114 BUG_ON(prev_end_pfn > start_pfn); 3947 end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
4115 hole_pages += start_pfn - prev_end_pfn; 3948 nr_absent -= end_pfn - start_pfn;
4116 }
4117 prev_end_pfn = early_node_map[i].end_pfn;
4118 } 3949 }
4119 3950 return nr_absent;
4120 /* Account for ranges past physical memory on this node */
4121 if (range_end_pfn > prev_end_pfn)
4122 hole_pages += range_end_pfn -
4123 max(range_start_pfn, prev_end_pfn);
4124
4125 return hole_pages;
4126} 3951}
4127 3952
4128/** 3953/**
@@ -4143,14 +3968,14 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
4143 unsigned long zone_type, 3968 unsigned long zone_type,
4144 unsigned long *ignored) 3969 unsigned long *ignored)
4145{ 3970{
3971 unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
3972 unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
4146 unsigned long node_start_pfn, node_end_pfn; 3973 unsigned long node_start_pfn, node_end_pfn;
4147 unsigned long zone_start_pfn, zone_end_pfn; 3974 unsigned long zone_start_pfn, zone_end_pfn;
4148 3975
4149 get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); 3976 get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
4150 zone_start_pfn = max(arch_zone_lowest_possible_pfn[zone_type], 3977 zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
4151 node_start_pfn); 3978 zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
4152 zone_end_pfn = min(arch_zone_highest_possible_pfn[zone_type],
4153 node_end_pfn);
4154 3979
4155 adjust_zone_range_for_zone_movable(nid, zone_type, 3980 adjust_zone_range_for_zone_movable(nid, zone_type,
4156 node_start_pfn, node_end_pfn, 3981 node_start_pfn, node_end_pfn,
@@ -4158,7 +3983,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
4158 return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); 3983 return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
4159} 3984}
4160 3985
4161#else 3986#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4162static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, 3987static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
4163 unsigned long zone_type, 3988 unsigned long zone_type,
4164 unsigned long *zones_size) 3989 unsigned long *zones_size)
@@ -4176,7 +4001,7 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
4176 return zholes_size[zone_type]; 4001 return zholes_size[zone_type];
4177} 4002}
4178 4003
4179#endif 4004#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4180 4005
4181static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, 4006static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
4182 unsigned long *zones_size, unsigned long *zholes_size) 4007 unsigned long *zones_size, unsigned long *zholes_size)
@@ -4399,10 +4224,10 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
4399 */ 4224 */
4400 if (pgdat == NODE_DATA(0)) { 4225 if (pgdat == NODE_DATA(0)) {
4401 mem_map = NODE_DATA(0)->node_mem_map; 4226 mem_map = NODE_DATA(0)->node_mem_map;
4402#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 4227#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4403 if (page_to_pfn(mem_map) != pgdat->node_start_pfn) 4228 if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
4404 mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET); 4229 mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
4405#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ 4230#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4406 } 4231 }
4407#endif 4232#endif
4408#endif /* CONFIG_FLAT_NODE_MEM_MAP */ 4233#endif /* CONFIG_FLAT_NODE_MEM_MAP */
@@ -4427,7 +4252,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
4427 free_area_init_core(pgdat, zones_size, zholes_size); 4252 free_area_init_core(pgdat, zones_size, zholes_size);
4428} 4253}
4429 4254
4430#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 4255#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4431 4256
4432#if MAX_NUMNODES > 1 4257#if MAX_NUMNODES > 1
4433/* 4258/*
@@ -4449,170 +4274,6 @@ static inline void setup_nr_node_ids(void)
4449#endif 4274#endif
4450 4275
4451/** 4276/**
4452 * add_active_range - Register a range of PFNs backed by physical memory
4453 * @nid: The node ID the range resides on
4454 * @start_pfn: The start PFN of the available physical memory
4455 * @end_pfn: The end PFN of the available physical memory
4456 *
4457 * These ranges are stored in an early_node_map[] and later used by
4458 * free_area_init_nodes() to calculate zone sizes and holes. If the
4459 * range spans a memory hole, it is up to the architecture to ensure
4460 * the memory is not freed by the bootmem allocator. If possible
4461 * the range being registered will be merged with existing ranges.
4462 */
4463void __init add_active_range(unsigned int nid, unsigned long start_pfn,
4464 unsigned long end_pfn)
4465{
4466 int i;
4467
4468 mminit_dprintk(MMINIT_TRACE, "memory_register",
4469 "Entering add_active_range(%d, %#lx, %#lx) "
4470 "%d entries of %d used\n",
4471 nid, start_pfn, end_pfn,
4472 nr_nodemap_entries, MAX_ACTIVE_REGIONS);
4473
4474 mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
4475
4476 /* Merge with existing active regions if possible */
4477 for (i = 0; i < nr_nodemap_entries; i++) {
4478 if (early_node_map[i].nid != nid)
4479 continue;
4480
4481 /* Skip if an existing region covers this new one */
4482 if (start_pfn >= early_node_map[i].start_pfn &&
4483 end_pfn <= early_node_map[i].end_pfn)
4484 return;
4485
4486 /* Merge forward if suitable */
4487 if (start_pfn <= early_node_map[i].end_pfn &&
4488 end_pfn > early_node_map[i].end_pfn) {
4489 early_node_map[i].end_pfn = end_pfn;
4490 return;
4491 }
4492
4493 /* Merge backward if suitable */
4494 if (start_pfn < early_node_map[i].start_pfn &&
4495 end_pfn >= early_node_map[i].start_pfn) {
4496 early_node_map[i].start_pfn = start_pfn;
4497 return;
4498 }
4499 }
4500
4501 /* Check that early_node_map is large enough */
4502 if (i >= MAX_ACTIVE_REGIONS) {
4503 printk(KERN_CRIT "More than %d memory regions, truncating\n",
4504 MAX_ACTIVE_REGIONS);
4505 return;
4506 }
4507
4508 early_node_map[i].nid = nid;
4509 early_node_map[i].start_pfn = start_pfn;
4510 early_node_map[i].end_pfn = end_pfn;
4511 nr_nodemap_entries = i + 1;
4512}
4513
4514/**
4515 * remove_active_range - Shrink an existing registered range of PFNs
4516 * @nid: The node id the range is on that should be shrunk
4517 * @start_pfn: The new PFN of the range
4518 * @end_pfn: The new PFN of the range
4519 *
4520 * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
4521 * The map is kept near the end physical page range that has already been
4522 * registered. This function allows an arch to shrink an existing registered
4523 * range.
4524 */
4525void __init remove_active_range(unsigned int nid, unsigned long start_pfn,
4526 unsigned long end_pfn)
4527{
4528 int i, j;
4529 int removed = 0;
4530
4531 printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n",
4532 nid, start_pfn, end_pfn);
4533
4534 /* Find the old active region end and shrink */
4535 for_each_active_range_index_in_nid(i, nid) {
4536 if (early_node_map[i].start_pfn >= start_pfn &&
4537 early_node_map[i].end_pfn <= end_pfn) {
4538 /* clear it */
4539 early_node_map[i].start_pfn = 0;
4540 early_node_map[i].end_pfn = 0;
4541 removed = 1;
4542 continue;
4543 }
4544 if (early_node_map[i].start_pfn < start_pfn &&
4545 early_node_map[i].end_pfn > start_pfn) {
4546 unsigned long temp_end_pfn = early_node_map[i].end_pfn;
4547 early_node_map[i].end_pfn = start_pfn;
4548 if (temp_end_pfn > end_pfn)
4549 add_active_range(nid, end_pfn, temp_end_pfn);
4550 continue;
4551 }
4552 if (early_node_map[i].start_pfn >= start_pfn &&
4553 early_node_map[i].end_pfn > end_pfn &&
4554 early_node_map[i].start_pfn < end_pfn) {
4555 early_node_map[i].start_pfn = end_pfn;
4556 continue;
4557 }
4558 }
4559
4560 if (!removed)
4561 return;
4562
4563 /* remove the blank ones */
4564 for (i = nr_nodemap_entries - 1; i > 0; i--) {
4565 if (early_node_map[i].nid != nid)
4566 continue;
4567 if (early_node_map[i].end_pfn)
4568 continue;
4569 /* we found it, get rid of it */
4570 for (j = i; j < nr_nodemap_entries - 1; j++)
4571 memcpy(&early_node_map[j], &early_node_map[j+1],
4572 sizeof(early_node_map[j]));
4573 j = nr_nodemap_entries - 1;
4574 memset(&early_node_map[j], 0, sizeof(early_node_map[j]));
4575 nr_nodemap_entries--;
4576 }
4577}
4578
4579/**
4580 * remove_all_active_ranges - Remove all currently registered regions
4581 *
4582 * During discovery, it may be found that a table like SRAT is invalid
4583 * and an alternative discovery method must be used. This function removes
4584 * all currently registered regions.
4585 */
4586void __init remove_all_active_ranges(void)
4587{
4588 memset(early_node_map, 0, sizeof(early_node_map));
4589 nr_nodemap_entries = 0;
4590}
4591
4592/* Compare two active node_active_regions */
4593static int __init cmp_node_active_region(const void *a, const void *b)
4594{
4595 struct node_active_region *arange = (struct node_active_region *)a;
4596 struct node_active_region *brange = (struct node_active_region *)b;
4597
4598 /* Done this way to avoid overflows */
4599 if (arange->start_pfn > brange->start_pfn)
4600 return 1;
4601 if (arange->start_pfn < brange->start_pfn)
4602 return -1;
4603
4604 return 0;
4605}
4606
4607/* sort the node_map by start_pfn */
4608void __init sort_node_map(void)
4609{
4610 sort(early_node_map, (size_t)nr_nodemap_entries,
4611 sizeof(struct node_active_region),
4612 cmp_node_active_region, NULL);
4613}
4614
4615/**
4616 * node_map_pfn_alignment - determine the maximum internode alignment 4277 * node_map_pfn_alignment - determine the maximum internode alignment
4617 * 4278 *
4618 * This function should be called after node map is populated and sorted. 4279 * This function should be called after node map is populated and sorted.
@@ -4634,15 +4295,11 @@ void __init sort_node_map(void)
4634unsigned long __init node_map_pfn_alignment(void) 4295unsigned long __init node_map_pfn_alignment(void)
4635{ 4296{
4636 unsigned long accl_mask = 0, last_end = 0; 4297 unsigned long accl_mask = 0, last_end = 0;
4298 unsigned long start, end, mask;
4637 int last_nid = -1; 4299 int last_nid = -1;
4638 int i; 4300 int i, nid;
4639
4640 for_each_active_range_index_in_nid(i, MAX_NUMNODES) {
4641 int nid = early_node_map[i].nid;
4642 unsigned long start = early_node_map[i].start_pfn;
4643 unsigned long end = early_node_map[i].end_pfn;
4644 unsigned long mask;
4645 4301
4302 for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) {
4646 if (!start || last_nid < 0 || last_nid == nid) { 4303 if (!start || last_nid < 0 || last_nid == nid) {
4647 last_nid = nid; 4304 last_nid = nid;
4648 last_end = end; 4305 last_end = end;
@@ -4669,12 +4326,12 @@ unsigned long __init node_map_pfn_alignment(void)
4669/* Find the lowest pfn for a node */ 4326/* Find the lowest pfn for a node */
4670static unsigned long __init find_min_pfn_for_node(int nid) 4327static unsigned long __init find_min_pfn_for_node(int nid)
4671{ 4328{
4672 int i;
4673 unsigned long min_pfn = ULONG_MAX; 4329 unsigned long min_pfn = ULONG_MAX;
4330 unsigned long start_pfn;
4331 int i;
4674 4332
4675 /* Assuming a sorted map, the first range found has the starting pfn */ 4333 for_each_mem_pfn_range(i, nid, &start_pfn, NULL, NULL)
4676 for_each_active_range_index_in_nid(i, nid) 4334 min_pfn = min(min_pfn, start_pfn);
4677 min_pfn = min(min_pfn, early_node_map[i].start_pfn);
4678 4335
4679 if (min_pfn == ULONG_MAX) { 4336 if (min_pfn == ULONG_MAX) {
4680 printk(KERN_WARNING 4337 printk(KERN_WARNING
@@ -4703,15 +4360,16 @@ unsigned long __init find_min_pfn_with_active_regions(void)
4703 */ 4360 */
4704static unsigned long __init early_calculate_totalpages(void) 4361static unsigned long __init early_calculate_totalpages(void)
4705{ 4362{
4706 int i;
4707 unsigned long totalpages = 0; 4363 unsigned long totalpages = 0;
4364 unsigned long start_pfn, end_pfn;
4365 int i, nid;
4366
4367 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
4368 unsigned long pages = end_pfn - start_pfn;
4708 4369
4709 for (i = 0; i < nr_nodemap_entries; i++) {
4710 unsigned long pages = early_node_map[i].end_pfn -
4711 early_node_map[i].start_pfn;
4712 totalpages += pages; 4370 totalpages += pages;
4713 if (pages) 4371 if (pages)
4714 node_set_state(early_node_map[i].nid, N_HIGH_MEMORY); 4372 node_set_state(nid, N_HIGH_MEMORY);
4715 } 4373 }
4716 return totalpages; 4374 return totalpages;
4717} 4375}
@@ -4766,6 +4424,8 @@ restart:
4766 /* Spread kernelcore memory as evenly as possible throughout nodes */ 4424 /* Spread kernelcore memory as evenly as possible throughout nodes */
4767 kernelcore_node = required_kernelcore / usable_nodes; 4425 kernelcore_node = required_kernelcore / usable_nodes;
4768 for_each_node_state(nid, N_HIGH_MEMORY) { 4426 for_each_node_state(nid, N_HIGH_MEMORY) {
4427 unsigned long start_pfn, end_pfn;
4428
4769 /* 4429 /*
4770 * Recalculate kernelcore_node if the division per node 4430 * Recalculate kernelcore_node if the division per node
4771 * now exceeds what is necessary to satisfy the requested 4431 * now exceeds what is necessary to satisfy the requested
@@ -4782,13 +4442,10 @@ restart:
4782 kernelcore_remaining = kernelcore_node; 4442 kernelcore_remaining = kernelcore_node;
4783 4443
4784 /* Go through each range of PFNs within this node */ 4444 /* Go through each range of PFNs within this node */
4785 for_each_active_range_index_in_nid(i, nid) { 4445 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
4786 unsigned long start_pfn, end_pfn;
4787 unsigned long size_pages; 4446 unsigned long size_pages;
4788 4447
4789 start_pfn = max(early_node_map[i].start_pfn, 4448 start_pfn = max(start_pfn, zone_movable_pfn[nid]);
4790 zone_movable_pfn[nid]);
4791 end_pfn = early_node_map[i].end_pfn;
4792 if (start_pfn >= end_pfn) 4449 if (start_pfn >= end_pfn)
4793 continue; 4450 continue;
4794 4451
@@ -4890,11 +4547,8 @@ static void check_for_regular_memory(pg_data_t *pgdat)
4890 */ 4547 */
4891void __init free_area_init_nodes(unsigned long *max_zone_pfn) 4548void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4892{ 4549{
4893 unsigned long nid; 4550 unsigned long start_pfn, end_pfn;
4894 int i; 4551 int i, nid;
4895
4896 /* Sort early_node_map as initialisation assumes it is sorted */
4897 sort_node_map();
4898 4552
4899 /* Record where the zone boundaries are */ 4553 /* Record where the zone boundaries are */
4900 memset(arch_zone_lowest_possible_pfn, 0, 4554 memset(arch_zone_lowest_possible_pfn, 0,
@@ -4941,11 +4595,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4941 } 4595 }
4942 4596
4943 /* Print out the early_node_map[] */ 4597 /* Print out the early_node_map[] */
4944 printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries); 4598 printk("Early memory PFN ranges\n");
4945 for (i = 0; i < nr_nodemap_entries; i++) 4599 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
4946 printk(" %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid, 4600 printk(" %3d: %0#10lx -> %0#10lx\n", nid, start_pfn, end_pfn);
4947 early_node_map[i].start_pfn,
4948 early_node_map[i].end_pfn);
4949 4601
4950 /* Initialise every node */ 4602 /* Initialise every node */
4951 mminit_verify_pageflags_layout(); 4603 mminit_verify_pageflags_layout();
@@ -4998,7 +4650,7 @@ static int __init cmdline_parse_movablecore(char *p)
4998early_param("kernelcore", cmdline_parse_kernelcore); 4650early_param("kernelcore", cmdline_parse_kernelcore);
4999early_param("movablecore", cmdline_parse_movablecore); 4651early_param("movablecore", cmdline_parse_movablecore);
5000 4652
5001#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ 4653#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
5002 4654
5003/** 4655/**
5004 * set_dma_reserve - set the specified number of pages reserved in the first zone 4656 * set_dma_reserve - set the specified number of pages reserved in the first zone