aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-06 10:54:53 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-06 10:54:53 -0500
commit4a2164a7dbf0d3b6a1c2ef6f20c0d54350491a12 (patch)
tree1ef38a6a3b39f7e539fff848975a5672acc21f44 /mm
parent15f043a65f655eb8a3aeb831a85da66de520c80f (diff)
parent45aa0663cc408617b79a2b53f0a5f50e94688a48 (diff)
Merge branch 'core-memblock-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'core-memblock-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (52 commits) memblock: Reimplement memblock allocation using reverse free area iterator memblock: Kill early_node_map[] score: Use HAVE_MEMBLOCK_NODE_MAP s390: Use HAVE_MEMBLOCK_NODE_MAP mips: Use HAVE_MEMBLOCK_NODE_MAP ia64: Use HAVE_MEMBLOCK_NODE_MAP SuperH: Use HAVE_MEMBLOCK_NODE_MAP sparc: Use HAVE_MEMBLOCK_NODE_MAP powerpc: Use HAVE_MEMBLOCK_NODE_MAP memblock: Implement memblock_add_node() memblock: s/memblock_analyze()/memblock_allow_resize()/ and update users memblock: Track total size of regions automatically powerpc: Cleanup memblock usage memblock: Reimplement memblock_enforce_memory_limit() using __memblock_remove() memblock: Make memblock functions handle overflowing range @size memblock: Reimplement __memblock_remove() using memblock_isolate_range() memblock: Separate out memblock_isolate_range() from memblock_set_node() memblock: Kill memblock_init() memblock: Kill sentinel entries at the end of static region arrays memblock: Add __memblock_dump_all() ...
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig6
-rw-r--r--mm/memblock.c961
-rw-r--r--mm/nobootmem.c45
-rw-r--r--mm/page_alloc.c508
4 files changed, 640 insertions, 880 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 011b110365c8..e338407f1225 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -131,6 +131,12 @@ config SPARSEMEM_VMEMMAP
131config HAVE_MEMBLOCK 131config HAVE_MEMBLOCK
132 boolean 132 boolean
133 133
134config HAVE_MEMBLOCK_NODE_MAP
135 boolean
136
137config ARCH_DISCARD_MEMBLOCK
138 boolean
139
134config NO_BOOTMEM 140config NO_BOOTMEM
135 boolean 141 boolean
136 142
diff --git a/mm/memblock.c b/mm/memblock.c
index 84bec4969ed5..2f55f19b7c86 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -20,12 +20,23 @@
20#include <linux/seq_file.h> 20#include <linux/seq_file.h>
21#include <linux/memblock.h> 21#include <linux/memblock.h>
22 22
23struct memblock memblock __initdata_memblock; 23static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
24static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
25
26struct memblock memblock __initdata_memblock = {
27 .memory.regions = memblock_memory_init_regions,
28 .memory.cnt = 1, /* empty dummy entry */
29 .memory.max = INIT_MEMBLOCK_REGIONS,
30
31 .reserved.regions = memblock_reserved_init_regions,
32 .reserved.cnt = 1, /* empty dummy entry */
33 .reserved.max = INIT_MEMBLOCK_REGIONS,
34
35 .current_limit = MEMBLOCK_ALLOC_ANYWHERE,
36};
24 37
25int memblock_debug __initdata_memblock; 38int memblock_debug __initdata_memblock;
26int memblock_can_resize __initdata_memblock; 39static int memblock_can_resize __initdata_memblock;
27static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock;
28static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock;
29 40
30/* inline so we don't get a warning when pr_debug is compiled out */ 41/* inline so we don't get a warning when pr_debug is compiled out */
31static inline const char *memblock_type_name(struct memblock_type *type) 42static inline const char *memblock_type_name(struct memblock_type *type)
@@ -38,20 +49,15 @@ static inline const char *memblock_type_name(struct memblock_type *type)
38 return "unknown"; 49 return "unknown";
39} 50}
40 51
41/* 52/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */
42 * Address comparison utilities 53static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
43 */
44
45static phys_addr_t __init_memblock memblock_align_down(phys_addr_t addr, phys_addr_t size)
46{
47 return addr & ~(size - 1);
48}
49
50static phys_addr_t __init_memblock memblock_align_up(phys_addr_t addr, phys_addr_t size)
51{ 54{
52 return (addr + (size - 1)) & ~(size - 1); 55 return *size = min(*size, (phys_addr_t)ULLONG_MAX - base);
53} 56}
54 57
58/*
59 * Address comparison utilities
60 */
55static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, 61static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
56 phys_addr_t base2, phys_addr_t size2) 62 phys_addr_t base2, phys_addr_t size2)
57{ 63{
@@ -73,83 +79,66 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
73 return (i < type->cnt) ? i : -1; 79 return (i < type->cnt) ? i : -1;
74} 80}
75 81
76/* 82/**
77 * Find, allocate, deallocate or reserve unreserved regions. All allocations 83 * memblock_find_in_range_node - find free area in given range and node
78 * are top-down. 84 * @start: start of candidate range
85 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
86 * @size: size of free area to find
87 * @align: alignment of free area to find
88 * @nid: nid of the free area to find, %MAX_NUMNODES for any node
89 *
90 * Find @size free area aligned to @align in the specified range and node.
91 *
92 * RETURNS:
93 * Found address on success, %0 on failure.
79 */ 94 */
80 95phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
81static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_addr_t end, 96 phys_addr_t end, phys_addr_t size,
82 phys_addr_t size, phys_addr_t align) 97 phys_addr_t align, int nid)
83{ 98{
84 phys_addr_t base, res_base; 99 phys_addr_t this_start, this_end, cand;
85 long j; 100 u64 i;
86
87 /* In case, huge size is requested */
88 if (end < size)
89 return MEMBLOCK_ERROR;
90
91 base = memblock_align_down((end - size), align);
92 101
93 /* Prevent allocations returning 0 as it's also used to 102 /* align @size to avoid excessive fragmentation on reserved array */
94 * indicate an allocation failure 103 size = round_up(size, align);
95 */
96 if (start == 0)
97 start = PAGE_SIZE;
98
99 while (start <= base) {
100 j = memblock_overlaps_region(&memblock.reserved, base, size);
101 if (j < 0)
102 return base;
103 res_base = memblock.reserved.regions[j].base;
104 if (res_base < size)
105 break;
106 base = memblock_align_down(res_base - size, align);
107 }
108 104
109 return MEMBLOCK_ERROR; 105 /* pump up @end */
110}
111
112static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size,
113 phys_addr_t align, phys_addr_t start, phys_addr_t end)
114{
115 long i;
116
117 BUG_ON(0 == size);
118
119 /* Pump up max_addr */
120 if (end == MEMBLOCK_ALLOC_ACCESSIBLE) 106 if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
121 end = memblock.current_limit; 107 end = memblock.current_limit;
122 108
123 /* We do a top-down search, this tends to limit memory 109 /* adjust @start to avoid underflow and allocating the first page */
124 * fragmentation by keeping early boot allocs near the 110 start = max3(start, size, (phys_addr_t)PAGE_SIZE);
125 * top of memory 111 end = max(start, end);
126 */
127 for (i = memblock.memory.cnt - 1; i >= 0; i--) {
128 phys_addr_t memblockbase = memblock.memory.regions[i].base;
129 phys_addr_t memblocksize = memblock.memory.regions[i].size;
130 phys_addr_t bottom, top, found;
131 112
132 if (memblocksize < size) 113 for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) {
133 continue; 114 this_start = clamp(this_start, start, end);
134 if ((memblockbase + memblocksize) <= start) 115 this_end = clamp(this_end, start, end);
135 break; 116
136 bottom = max(memblockbase, start); 117 cand = round_down(this_end - size, align);
137 top = min(memblockbase + memblocksize, end); 118 if (cand >= this_start)
138 if (bottom >= top) 119 return cand;
139 continue;
140 found = memblock_find_region(bottom, top, size, align);
141 if (found != MEMBLOCK_ERROR)
142 return found;
143 } 120 }
144 return MEMBLOCK_ERROR; 121 return 0;
145} 122}
146 123
147/* 124/**
148 * Find a free area with specified alignment in a specific range. 125 * memblock_find_in_range - find free area in given range
126 * @start: start of candidate range
127 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
128 * @size: size of free area to find
129 * @align: alignment of free area to find
130 *
131 * Find @size free area aligned to @align in the specified range.
132 *
133 * RETURNS:
134 * Found address on success, %0 on failure.
149 */ 135 */
150u64 __init_memblock memblock_find_in_range(u64 start, u64 end, u64 size, u64 align) 136phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
137 phys_addr_t end, phys_addr_t size,
138 phys_addr_t align)
151{ 139{
152 return memblock_find_base(size, align, start, end); 140 return memblock_find_in_range_node(start, end, size, align,
141 MAX_NUMNODES);
153} 142}
154 143
155/* 144/*
@@ -178,25 +167,21 @@ int __init_memblock memblock_reserve_reserved_regions(void)
178 167
179static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) 168static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
180{ 169{
181 unsigned long i; 170 type->total_size -= type->regions[r].size;
182 171 memmove(&type->regions[r], &type->regions[r + 1],
183 for (i = r; i < type->cnt - 1; i++) { 172 (type->cnt - (r + 1)) * sizeof(type->regions[r]));
184 type->regions[i].base = type->regions[i + 1].base;
185 type->regions[i].size = type->regions[i + 1].size;
186 }
187 type->cnt--; 173 type->cnt--;
188 174
189 /* Special case for empty arrays */ 175 /* Special case for empty arrays */
190 if (type->cnt == 0) { 176 if (type->cnt == 0) {
177 WARN_ON(type->total_size != 0);
191 type->cnt = 1; 178 type->cnt = 1;
192 type->regions[0].base = 0; 179 type->regions[0].base = 0;
193 type->regions[0].size = 0; 180 type->regions[0].size = 0;
181 memblock_set_region_node(&type->regions[0], MAX_NUMNODES);
194 } 182 }
195} 183}
196 184
197/* Defined below but needed now */
198static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size);
199
200static int __init_memblock memblock_double_array(struct memblock_type *type) 185static int __init_memblock memblock_double_array(struct memblock_type *type)
201{ 186{
202 struct memblock_region *new_array, *old_array; 187 struct memblock_region *new_array, *old_array;
@@ -226,10 +211,10 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
226 */ 211 */
227 if (use_slab) { 212 if (use_slab) {
228 new_array = kmalloc(new_size, GFP_KERNEL); 213 new_array = kmalloc(new_size, GFP_KERNEL);
229 addr = new_array == NULL ? MEMBLOCK_ERROR : __pa(new_array); 214 addr = new_array ? __pa(new_array) : 0;
230 } else 215 } else
231 addr = memblock_find_base(new_size, sizeof(phys_addr_t), 0, MEMBLOCK_ALLOC_ACCESSIBLE); 216 addr = memblock_find_in_range(0, MEMBLOCK_ALLOC_ACCESSIBLE, new_size, sizeof(phys_addr_t));
232 if (addr == MEMBLOCK_ERROR) { 217 if (!addr) {
233 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", 218 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
234 memblock_type_name(type), type->max, type->max * 2); 219 memblock_type_name(type), type->max, type->max * 2);
235 return -1; 220 return -1;
@@ -254,7 +239,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
254 return 0; 239 return 0;
255 240
256 /* Add the new reserved region now. Should not fail ! */ 241 /* Add the new reserved region now. Should not fail ! */
257 BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size)); 242 BUG_ON(memblock_reserve(addr, new_size));
258 243
259 /* If the array wasn't our static init one, then free it. We only do 244 /* If the array wasn't our static init one, then free it. We only do
260 * that before SLAB is available as later on, we don't know whether 245 * that before SLAB is available as later on, we don't know whether
@@ -268,343 +253,514 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
268 return 0; 253 return 0;
269} 254}
270 255
271int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, 256/**
272 phys_addr_t addr2, phys_addr_t size2) 257 * memblock_merge_regions - merge neighboring compatible regions
273{ 258 * @type: memblock type to scan
274 return 1; 259 *
275} 260 * Scan @type and merge neighboring compatible regions.
276 261 */
277static long __init_memblock memblock_add_region(struct memblock_type *type, 262static void __init_memblock memblock_merge_regions(struct memblock_type *type)
278 phys_addr_t base, phys_addr_t size)
279{ 263{
280 phys_addr_t end = base + size; 264 int i = 0;
281 int i, slot = -1;
282
283 /* First try and coalesce this MEMBLOCK with others */
284 for (i = 0; i < type->cnt; i++) {
285 struct memblock_region *rgn = &type->regions[i];
286 phys_addr_t rend = rgn->base + rgn->size;
287 265
288 /* Exit if there's no possible hits */ 266 /* cnt never goes below 1 */
289 if (rgn->base > end || rgn->size == 0) 267 while (i < type->cnt - 1) {
290 break; 268 struct memblock_region *this = &type->regions[i];
269 struct memblock_region *next = &type->regions[i + 1];
291 270
292 /* Check if we are fully enclosed within an existing 271 if (this->base + this->size != next->base ||
293 * block 272 memblock_get_region_node(this) !=
294 */ 273 memblock_get_region_node(next)) {
295 if (rgn->base <= base && rend >= end) 274 BUG_ON(this->base + this->size > next->base);
296 return 0; 275 i++;
276 continue;
277 }
297 278
298 /* Check if we overlap or are adjacent with the bottom 279 this->size += next->size;
299 * of a block. 280 memmove(next, next + 1, (type->cnt - (i + 1)) * sizeof(*next));
300 */ 281 type->cnt--;
301 if (base < rgn->base && end >= rgn->base) { 282 }
302 /* If we can't coalesce, create a new block */ 283}
303 if (!memblock_memory_can_coalesce(base, size,
304 rgn->base,
305 rgn->size)) {
306 /* Overlap & can't coalesce are mutually
307 * exclusive, if you do that, be prepared
308 * for trouble
309 */
310 WARN_ON(end != rgn->base);
311 goto new_block;
312 }
313 /* We extend the bottom of the block down to our
314 * base
315 */
316 rgn->base = base;
317 rgn->size = rend - base;
318 284
319 /* Return if we have nothing else to allocate 285/**
320 * (fully coalesced) 286 * memblock_insert_region - insert new memblock region
321 */ 287 * @type: memblock type to insert into
322 if (rend >= end) 288 * @idx: index for the insertion point
323 return 0; 289 * @base: base address of the new region
290 * @size: size of the new region
291 *
292 * Insert new memblock region [@base,@base+@size) into @type at @idx.
293 * @type must already have extra room to accomodate the new region.
294 */
295static void __init_memblock memblock_insert_region(struct memblock_type *type,
296 int idx, phys_addr_t base,
297 phys_addr_t size, int nid)
298{
299 struct memblock_region *rgn = &type->regions[idx];
324 300
325 /* We continue processing from the end of the 301 BUG_ON(type->cnt >= type->max);
326 * coalesced block. 302 memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn));
327 */ 303 rgn->base = base;
328 base = rend; 304 rgn->size = size;
329 size = end - base; 305 memblock_set_region_node(rgn, nid);
330 } 306 type->cnt++;
307 type->total_size += size;
308}
331 309
332 /* Now check if we overlap or are adjacent with the 310/**
333 * top of a block 311 * memblock_add_region - add new memblock region
334 */ 312 * @type: memblock type to add new region into
335 if (base <= rend && end >= rend) { 313 * @base: base address of the new region
336 /* If we can't coalesce, create a new block */ 314 * @size: size of the new region
337 if (!memblock_memory_can_coalesce(rgn->base, 315 * @nid: nid of the new region
338 rgn->size, 316 *
339 base, size)) { 317 * Add new memblock region [@base,@base+@size) into @type. The new region
340 /* Overlap & can't coalesce are mutually 318 * is allowed to overlap with existing ones - overlaps don't affect already
341 * exclusive, if you do that, be prepared 319 * existing regions. @type is guaranteed to be minimal (all neighbouring
342 * for trouble 320 * compatible regions are merged) after the addition.
343 */ 321 *
344 WARN_ON(rend != base); 322 * RETURNS:
345 goto new_block; 323 * 0 on success, -errno on failure.
346 } 324 */
347 /* We adjust our base down to enclose the 325static int __init_memblock memblock_add_region(struct memblock_type *type,
348 * original block and destroy it. It will be 326 phys_addr_t base, phys_addr_t size, int nid)
349 * part of our new allocation. Since we've 327{
350 * freed an entry, we know we won't fail 328 bool insert = false;
351 * to allocate one later, so we won't risk 329 phys_addr_t obase = base;
352 * losing the original block allocation. 330 phys_addr_t end = base + memblock_cap_size(base, &size);
353 */ 331 int i, nr_new;
354 size += (base - rgn->base);
355 base = rgn->base;
356 memblock_remove_region(type, i--);
357 }
358 }
359 332
360 /* If the array is empty, special case, replace the fake 333 /* special case for empty array */
361 * filler region and return 334 if (type->regions[0].size == 0) {
362 */ 335 WARN_ON(type->cnt != 1 || type->total_size);
363 if ((type->cnt == 1) && (type->regions[0].size == 0)) {
364 type->regions[0].base = base; 336 type->regions[0].base = base;
365 type->regions[0].size = size; 337 type->regions[0].size = size;
338 memblock_set_region_node(&type->regions[0], nid);
339 type->total_size = size;
366 return 0; 340 return 0;
367 } 341 }
368 342repeat:
369 new_block: 343 /*
370 /* If we are out of space, we fail. It's too late to resize the array 344 * The following is executed twice. Once with %false @insert and
371 * but then this shouldn't have happened in the first place. 345 * then with %true. The first counts the number of regions needed
346 * to accomodate the new area. The second actually inserts them.
372 */ 347 */
373 if (WARN_ON(type->cnt >= type->max)) 348 base = obase;
374 return -1; 349 nr_new = 0;
375 350
376 /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */ 351 for (i = 0; i < type->cnt; i++) {
377 for (i = type->cnt - 1; i >= 0; i--) { 352 struct memblock_region *rgn = &type->regions[i];
378 if (base < type->regions[i].base) { 353 phys_addr_t rbase = rgn->base;
379 type->regions[i+1].base = type->regions[i].base; 354 phys_addr_t rend = rbase + rgn->size;
380 type->regions[i+1].size = type->regions[i].size; 355
381 } else { 356 if (rbase >= end)
382 type->regions[i+1].base = base;
383 type->regions[i+1].size = size;
384 slot = i + 1;
385 break; 357 break;
358 if (rend <= base)
359 continue;
360 /*
361 * @rgn overlaps. If it separates the lower part of new
362 * area, insert that portion.
363 */
364 if (rbase > base) {
365 nr_new++;
366 if (insert)
367 memblock_insert_region(type, i++, base,
368 rbase - base, nid);
386 } 369 }
370 /* area below @rend is dealt with, forget about it */
371 base = min(rend, end);
387 } 372 }
388 if (base < type->regions[0].base) { 373
389 type->regions[0].base = base; 374 /* insert the remaining portion */
390 type->regions[0].size = size; 375 if (base < end) {
391 slot = 0; 376 nr_new++;
377 if (insert)
378 memblock_insert_region(type, i, base, end - base, nid);
392 } 379 }
393 type->cnt++;
394 380
395 /* The array is full ? Try to resize it. If that fails, we undo 381 /*
396 * our allocation and return an error 382 * If this was the first round, resize array and repeat for actual
383 * insertions; otherwise, merge and return.
397 */ 384 */
398 if (type->cnt == type->max && memblock_double_array(type)) { 385 if (!insert) {
399 BUG_ON(slot < 0); 386 while (type->cnt + nr_new > type->max)
400 memblock_remove_region(type, slot); 387 if (memblock_double_array(type) < 0)
401 return -1; 388 return -ENOMEM;
389 insert = true;
390 goto repeat;
391 } else {
392 memblock_merge_regions(type);
393 return 0;
402 } 394 }
403
404 return 0;
405} 395}
406 396
407long __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) 397int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
398 int nid)
408{ 399{
409 return memblock_add_region(&memblock.memory, base, size); 400 return memblock_add_region(&memblock.memory, base, size, nid);
401}
410 402
403int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
404{
405 return memblock_add_region(&memblock.memory, base, size, MAX_NUMNODES);
411} 406}
412 407
413static long __init_memblock __memblock_remove(struct memblock_type *type, 408/**
414 phys_addr_t base, phys_addr_t size) 409 * memblock_isolate_range - isolate given range into disjoint memblocks
410 * @type: memblock type to isolate range for
411 * @base: base of range to isolate
412 * @size: size of range to isolate
413 * @start_rgn: out parameter for the start of isolated region
414 * @end_rgn: out parameter for the end of isolated region
415 *
416 * Walk @type and ensure that regions don't cross the boundaries defined by
417 * [@base,@base+@size). Crossing regions are split at the boundaries,
418 * which may create at most two more regions. The index of the first
419 * region inside the range is returned in *@start_rgn and end in *@end_rgn.
420 *
421 * RETURNS:
422 * 0 on success, -errno on failure.
423 */
424static int __init_memblock memblock_isolate_range(struct memblock_type *type,
425 phys_addr_t base, phys_addr_t size,
426 int *start_rgn, int *end_rgn)
415{ 427{
416 phys_addr_t end = base + size; 428 phys_addr_t end = base + memblock_cap_size(base, &size);
417 int i; 429 int i;
418 430
419 /* Walk through the array for collisions */ 431 *start_rgn = *end_rgn = 0;
432
433 /* we'll create at most two more regions */
434 while (type->cnt + 2 > type->max)
435 if (memblock_double_array(type) < 0)
436 return -ENOMEM;
437
420 for (i = 0; i < type->cnt; i++) { 438 for (i = 0; i < type->cnt; i++) {
421 struct memblock_region *rgn = &type->regions[i]; 439 struct memblock_region *rgn = &type->regions[i];
422 phys_addr_t rend = rgn->base + rgn->size; 440 phys_addr_t rbase = rgn->base;
441 phys_addr_t rend = rbase + rgn->size;
423 442
424 /* Nothing more to do, exit */ 443 if (rbase >= end)
425 if (rgn->base > end || rgn->size == 0)
426 break; 444 break;
427 445 if (rend <= base)
428 /* If we fully enclose the block, drop it */
429 if (base <= rgn->base && end >= rend) {
430 memblock_remove_region(type, i--);
431 continue; 446 continue;
432 }
433 447
434 /* If we are fully enclosed within a block 448 if (rbase < base) {
435 * then we need to split it and we are done 449 /*
436 */ 450 * @rgn intersects from below. Split and continue
437 if (base > rgn->base && end < rend) { 451 * to process the next region - the new top half.
438 rgn->size = base - rgn->base; 452 */
439 if (!memblock_add_region(type, end, rend - end)) 453 rgn->base = base;
440 return 0; 454 rgn->size -= base - rbase;
441 /* Failure to split is bad, we at least 455 type->total_size -= base - rbase;
442 * restore the block before erroring 456 memblock_insert_region(type, i, rbase, base - rbase,
457 memblock_get_region_node(rgn));
458 } else if (rend > end) {
459 /*
460 * @rgn intersects from above. Split and redo the
461 * current region - the new bottom half.
443 */ 462 */
444 rgn->size = rend - rgn->base;
445 WARN_ON(1);
446 return -1;
447 }
448
449 /* Check if we need to trim the bottom of a block */
450 if (rgn->base < end && rend > end) {
451 rgn->size -= end - rgn->base;
452 rgn->base = end; 463 rgn->base = end;
453 break; 464 rgn->size -= end - rbase;
465 type->total_size -= end - rbase;
466 memblock_insert_region(type, i--, rbase, end - rbase,
467 memblock_get_region_node(rgn));
468 } else {
469 /* @rgn is fully contained, record it */
470 if (!*end_rgn)
471 *start_rgn = i;
472 *end_rgn = i + 1;
454 } 473 }
474 }
455 475
456 /* And check if we need to trim the top of a block */ 476 return 0;
457 if (base < rend) 477}
458 rgn->size -= rend - base;
459 478
460 } 479static int __init_memblock __memblock_remove(struct memblock_type *type,
480 phys_addr_t base, phys_addr_t size)
481{
482 int start_rgn, end_rgn;
483 int i, ret;
484
485 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
486 if (ret)
487 return ret;
488
489 for (i = end_rgn - 1; i >= start_rgn; i--)
490 memblock_remove_region(type, i);
461 return 0; 491 return 0;
462} 492}
463 493
464long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) 494int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
465{ 495{
466 return __memblock_remove(&memblock.memory, base, size); 496 return __memblock_remove(&memblock.memory, base, size);
467} 497}
468 498
469long __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) 499int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
470{ 500{
501 memblock_dbg(" memblock_free: [%#016llx-%#016llx] %pF\n",
502 (unsigned long long)base,
503 (unsigned long long)base + size,
504 (void *)_RET_IP_);
505
471 return __memblock_remove(&memblock.reserved, base, size); 506 return __memblock_remove(&memblock.reserved, base, size);
472} 507}
473 508
474long __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) 509int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
475{ 510{
476 struct memblock_type *_rgn = &memblock.reserved; 511 struct memblock_type *_rgn = &memblock.reserved;
477 512
513 memblock_dbg("memblock_reserve: [%#016llx-%#016llx] %pF\n",
514 (unsigned long long)base,
515 (unsigned long long)base + size,
516 (void *)_RET_IP_);
478 BUG_ON(0 == size); 517 BUG_ON(0 == size);
479 518
480 return memblock_add_region(_rgn, base, size); 519 return memblock_add_region(_rgn, base, size, MAX_NUMNODES);
481} 520}
482 521
483phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) 522/**
523 * __next_free_mem_range - next function for for_each_free_mem_range()
524 * @idx: pointer to u64 loop variable
525 * @nid: nid: node selector, %MAX_NUMNODES for all nodes
526 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
527 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
528 * @p_nid: ptr to int for nid of the range, can be %NULL
529 *
530 * Find the first free area from *@idx which matches @nid, fill the out
531 * parameters, and update *@idx for the next iteration. The lower 32bit of
532 * *@idx contains index into memory region and the upper 32bit indexes the
533 * areas before each reserved region. For example, if reserved regions
534 * look like the following,
535 *
536 * 0:[0-16), 1:[32-48), 2:[128-130)
537 *
538 * The upper 32bit indexes the following regions.
539 *
540 * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX)
541 *
542 * As both region arrays are sorted, the function advances the two indices
543 * in lockstep and returns each intersection.
544 */
545void __init_memblock __next_free_mem_range(u64 *idx, int nid,
546 phys_addr_t *out_start,
547 phys_addr_t *out_end, int *out_nid)
484{ 548{
485 phys_addr_t found; 549 struct memblock_type *mem = &memblock.memory;
550 struct memblock_type *rsv = &memblock.reserved;
551 int mi = *idx & 0xffffffff;
552 int ri = *idx >> 32;
486 553
487 /* We align the size to limit fragmentation. Without this, a lot of 554 for ( ; mi < mem->cnt; mi++) {
488 * small allocs quickly eat up the whole reserve array on sparc 555 struct memblock_region *m = &mem->regions[mi];
489 */ 556 phys_addr_t m_start = m->base;
490 size = memblock_align_up(size, align); 557 phys_addr_t m_end = m->base + m->size;
491 558
492 found = memblock_find_base(size, align, 0, max_addr); 559 /* only memory regions are associated with nodes, check it */
493 if (found != MEMBLOCK_ERROR && 560 if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m))
494 !memblock_add_region(&memblock.reserved, found, size)) 561 continue;
495 return found;
496 562
497 return 0; 563 /* scan areas before each reservation for intersection */
564 for ( ; ri < rsv->cnt + 1; ri++) {
565 struct memblock_region *r = &rsv->regions[ri];
566 phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0;
567 phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX;
568
569 /* if ri advanced past mi, break out to advance mi */
570 if (r_start >= m_end)
571 break;
572 /* if the two regions intersect, we're done */
573 if (m_start < r_end) {
574 if (out_start)
575 *out_start = max(m_start, r_start);
576 if (out_end)
577 *out_end = min(m_end, r_end);
578 if (out_nid)
579 *out_nid = memblock_get_region_node(m);
580 /*
581 * The region which ends first is advanced
582 * for the next iteration.
583 */
584 if (m_end <= r_end)
585 mi++;
586 else
587 ri++;
588 *idx = (u32)mi | (u64)ri << 32;
589 return;
590 }
591 }
592 }
593
594 /* signal end of iteration */
595 *idx = ULLONG_MAX;
498} 596}
499 597
500phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) 598/**
599 * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse()
600 * @idx: pointer to u64 loop variable
601 * @nid: nid: node selector, %MAX_NUMNODES for all nodes
602 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
603 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
604 * @p_nid: ptr to int for nid of the range, can be %NULL
605 *
606 * Reverse of __next_free_mem_range().
607 */
608void __init_memblock __next_free_mem_range_rev(u64 *idx, int nid,
609 phys_addr_t *out_start,
610 phys_addr_t *out_end, int *out_nid)
501{ 611{
502 phys_addr_t alloc; 612 struct memblock_type *mem = &memblock.memory;
613 struct memblock_type *rsv = &memblock.reserved;
614 int mi = *idx & 0xffffffff;
615 int ri = *idx >> 32;
503 616
504 alloc = __memblock_alloc_base(size, align, max_addr); 617 if (*idx == (u64)ULLONG_MAX) {
618 mi = mem->cnt - 1;
619 ri = rsv->cnt;
620 }
505 621
506 if (alloc == 0) 622 for ( ; mi >= 0; mi--) {
507 panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n", 623 struct memblock_region *m = &mem->regions[mi];
508 (unsigned long long) size, (unsigned long long) max_addr); 624 phys_addr_t m_start = m->base;
625 phys_addr_t m_end = m->base + m->size;
509 626
510 return alloc; 627 /* only memory regions are associated with nodes, check it */
511} 628 if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m))
629 continue;
512 630
513phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) 631 /* scan areas before each reservation for intersection */
514{ 632 for ( ; ri >= 0; ri--) {
515 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); 633 struct memblock_region *r = &rsv->regions[ri];
516} 634 phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0;
635 phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX;
636
637 /* if ri advanced past mi, break out to advance mi */
638 if (r_end <= m_start)
639 break;
640 /* if the two regions intersect, we're done */
641 if (m_end > r_start) {
642 if (out_start)
643 *out_start = max(m_start, r_start);
644 if (out_end)
645 *out_end = min(m_end, r_end);
646 if (out_nid)
647 *out_nid = memblock_get_region_node(m);
648
649 if (m_start >= r_start)
650 mi--;
651 else
652 ri--;
653 *idx = (u32)mi | (u64)ri << 32;
654 return;
655 }
656 }
657 }
517 658
659 *idx = ULLONG_MAX;
660}
518 661
662#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
519/* 663/*
520 * Additional node-local allocators. Search for node memory is bottom up 664 * Common iterator interface used to define for_each_mem_range().
521 * and walks memblock regions within that node bottom-up as well, but allocation
522 * within an memblock region is top-down. XXX I plan to fix that at some stage
523 *
524 * WARNING: Only available after early_node_map[] has been populated,
525 * on some architectures, that is after all the calls to add_active_range()
526 * have been done to populate it.
527 */ 665 */
528 666void __init_memblock __next_mem_pfn_range(int *idx, int nid,
529phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid) 667 unsigned long *out_start_pfn,
668 unsigned long *out_end_pfn, int *out_nid)
530{ 669{
531#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 670 struct memblock_type *type = &memblock.memory;
532 /* 671 struct memblock_region *r;
533 * This code originates from sparc which really wants use to walk by addresses
534 * and returns the nid. This is not very convenient for early_pfn_map[] users
535 * as the map isn't sorted yet, and it really wants to be walked by nid.
536 *
537 * For now, I implement the inefficient method below which walks the early
538 * map multiple times. Eventually we may want to use an ARCH config option
539 * to implement a completely different method for both case.
540 */
541 unsigned long start_pfn, end_pfn;
542 int i;
543 672
544 for (i = 0; i < MAX_NUMNODES; i++) { 673 while (++*idx < type->cnt) {
545 get_pfn_range_for_nid(i, &start_pfn, &end_pfn); 674 r = &type->regions[*idx];
546 if (start < PFN_PHYS(start_pfn) || start >= PFN_PHYS(end_pfn)) 675
676 if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size))
547 continue; 677 continue;
548 *nid = i; 678 if (nid == MAX_NUMNODES || nid == r->nid)
549 return min(end, PFN_PHYS(end_pfn)); 679 break;
680 }
681 if (*idx >= type->cnt) {
682 *idx = -1;
683 return;
550 } 684 }
551#endif
552 *nid = 0;
553 685
554 return end; 686 if (out_start_pfn)
687 *out_start_pfn = PFN_UP(r->base);
688 if (out_end_pfn)
689 *out_end_pfn = PFN_DOWN(r->base + r->size);
690 if (out_nid)
691 *out_nid = r->nid;
555} 692}
556 693
557static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp, 694/**
558 phys_addr_t size, 695 * memblock_set_node - set node ID on memblock regions
559 phys_addr_t align, int nid) 696 * @base: base of area to set node ID for
697 * @size: size of area to set node ID for
698 * @nid: node ID to set
699 *
700 * Set the nid of memblock memory regions in [@base,@base+@size) to @nid.
701 * Regions which cross the area boundaries are split as necessary.
702 *
703 * RETURNS:
704 * 0 on success, -errno on failure.
705 */
706int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
707 int nid)
560{ 708{
561 phys_addr_t start, end; 709 struct memblock_type *type = &memblock.memory;
710 int start_rgn, end_rgn;
711 int i, ret;
562 712
563 start = mp->base; 713 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
564 end = start + mp->size; 714 if (ret)
715 return ret;
565 716
566 start = memblock_align_up(start, align); 717 for (i = start_rgn; i < end_rgn; i++)
567 while (start < end) { 718 type->regions[i].nid = nid;
568 phys_addr_t this_end;
569 int this_nid;
570 719
571 this_end = memblock_nid_range(start, end, &this_nid); 720 memblock_merge_regions(type);
572 if (this_nid == nid) { 721 return 0;
573 phys_addr_t ret = memblock_find_region(start, this_end, size, align); 722}
574 if (ret != MEMBLOCK_ERROR && 723#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
575 !memblock_add_region(&memblock.reserved, ret, size)) 724
576 return ret; 725static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
577 } 726 phys_addr_t align, phys_addr_t max_addr,
578 start = this_end; 727 int nid)
579 } 728{
729 phys_addr_t found;
580 730
581 return MEMBLOCK_ERROR; 731 found = memblock_find_in_range_node(0, max_addr, size, align, nid);
732 if (found && !memblock_reserve(found, size))
733 return found;
734
735 return 0;
582} 736}
583 737
584phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) 738phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
585{ 739{
586 struct memblock_type *mem = &memblock.memory; 740 return memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE, nid);
587 int i; 741}
588 742
589 BUG_ON(0 == size); 743phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
744{
745 return memblock_alloc_base_nid(size, align, max_addr, MAX_NUMNODES);
746}
590 747
591 /* We align the size to limit fragmentation. Without this, a lot of 748phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
592 * small allocs quickly eat up the whole reserve array on sparc 749{
593 */ 750 phys_addr_t alloc;
594 size = memblock_align_up(size, align);
595 751
596 /* We do a bottom-up search for a region with the right 752 alloc = __memblock_alloc_base(size, align, max_addr);
597 * nid since that's easier considering how memblock_nid_range()
598 * works
599 */
600 for (i = 0; i < mem->cnt; i++) {
601 phys_addr_t ret = memblock_alloc_nid_region(&mem->regions[i],
602 size, align, nid);
603 if (ret != MEMBLOCK_ERROR)
604 return ret;
605 }
606 753
607 return 0; 754 if (alloc == 0)
755 panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
756 (unsigned long long) size, (unsigned long long) max_addr);
757
758 return alloc;
759}
760
761phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align)
762{
763 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
608} 764}
609 765
610phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) 766phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
@@ -613,7 +769,7 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i
613 769
614 if (res) 770 if (res)
615 return res; 771 return res;
616 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE); 772 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
617} 773}
618 774
619 775
@@ -621,10 +777,9 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i
621 * Remaining API functions 777 * Remaining API functions
622 */ 778 */
623 779
624/* You must call memblock_analyze() before this. */
625phys_addr_t __init memblock_phys_mem_size(void) 780phys_addr_t __init memblock_phys_mem_size(void)
626{ 781{
627 return memblock.memory_size; 782 return memblock.memory.total_size;
628} 783}
629 784
630/* lowest address */ 785/* lowest address */
@@ -640,45 +795,28 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void)
640 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); 795 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
641} 796}
642 797
643/* You must call memblock_analyze() after this. */ 798void __init memblock_enforce_memory_limit(phys_addr_t limit)
644void __init memblock_enforce_memory_limit(phys_addr_t memory_limit)
645{ 799{
646 unsigned long i; 800 unsigned long i;
647 phys_addr_t limit; 801 phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
648 struct memblock_region *p;
649 802
650 if (!memory_limit) 803 if (!limit)
651 return; 804 return;
652 805
653 /* Truncate the memblock regions to satisfy the memory limit. */ 806 /* find out max address */
654 limit = memory_limit;
655 for (i = 0; i < memblock.memory.cnt; i++) { 807 for (i = 0; i < memblock.memory.cnt; i++) {
656 if (limit > memblock.memory.regions[i].size) { 808 struct memblock_region *r = &memblock.memory.regions[i];
657 limit -= memblock.memory.regions[i].size;
658 continue;
659 }
660
661 memblock.memory.regions[i].size = limit;
662 memblock.memory.cnt = i + 1;
663 break;
664 }
665
666 memory_limit = memblock_end_of_DRAM();
667 809
668 /* And truncate any reserves above the limit also. */ 810 if (limit <= r->size) {
669 for (i = 0; i < memblock.reserved.cnt; i++) { 811 max_addr = r->base + limit;
670 p = &memblock.reserved.regions[i]; 812 break;
671
672 if (p->base > memory_limit)
673 p->size = 0;
674 else if ((p->base + p->size) > memory_limit)
675 p->size = memory_limit - p->base;
676
677 if (p->size == 0) {
678 memblock_remove_region(&memblock.reserved, i);
679 i--;
680 } 813 }
814 limit -= r->size;
681 } 815 }
816
817 /* truncate both memory and reserved regions */
818 __memblock_remove(&memblock.memory, max_addr, (phys_addr_t)ULLONG_MAX);
819 __memblock_remove(&memblock.reserved, max_addr, (phys_addr_t)ULLONG_MAX);
682} 820}
683 821
684static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) 822static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
@@ -712,16 +850,18 @@ int __init_memblock memblock_is_memory(phys_addr_t addr)
712int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) 850int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
713{ 851{
714 int idx = memblock_search(&memblock.memory, base); 852 int idx = memblock_search(&memblock.memory, base);
853 phys_addr_t end = base + memblock_cap_size(base, &size);
715 854
716 if (idx == -1) 855 if (idx == -1)
717 return 0; 856 return 0;
718 return memblock.memory.regions[idx].base <= base && 857 return memblock.memory.regions[idx].base <= base &&
719 (memblock.memory.regions[idx].base + 858 (memblock.memory.regions[idx].base +
720 memblock.memory.regions[idx].size) >= (base + size); 859 memblock.memory.regions[idx].size) >= end;
721} 860}
722 861
723int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) 862int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
724{ 863{
864 memblock_cap_size(base, &size);
725 return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; 865 return memblock_overlaps_region(&memblock.reserved, base, size) >= 0;
726} 866}
727 867
@@ -731,86 +871,45 @@ void __init_memblock memblock_set_current_limit(phys_addr_t limit)
731 memblock.current_limit = limit; 871 memblock.current_limit = limit;
732} 872}
733 873
734static void __init_memblock memblock_dump(struct memblock_type *region, char *name) 874static void __init_memblock memblock_dump(struct memblock_type *type, char *name)
735{ 875{
736 unsigned long long base, size; 876 unsigned long long base, size;
737 int i; 877 int i;
738 878
739 pr_info(" %s.cnt = 0x%lx\n", name, region->cnt); 879 pr_info(" %s.cnt = 0x%lx\n", name, type->cnt);
740 880
741 for (i = 0; i < region->cnt; i++) { 881 for (i = 0; i < type->cnt; i++) {
742 base = region->regions[i].base; 882 struct memblock_region *rgn = &type->regions[i];
743 size = region->regions[i].size; 883 char nid_buf[32] = "";
744 884
745 pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes\n", 885 base = rgn->base;
746 name, i, base, base + size - 1, size); 886 size = rgn->size;
887#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
888 if (memblock_get_region_node(rgn) != MAX_NUMNODES)
889 snprintf(nid_buf, sizeof(nid_buf), " on node %d",
890 memblock_get_region_node(rgn));
891#endif
892 pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes%s\n",
893 name, i, base, base + size - 1, size, nid_buf);
747 } 894 }
748} 895}
749 896
750void __init_memblock memblock_dump_all(void) 897void __init_memblock __memblock_dump_all(void)
751{ 898{
752 if (!memblock_debug)
753 return;
754
755 pr_info("MEMBLOCK configuration:\n"); 899 pr_info("MEMBLOCK configuration:\n");
756 pr_info(" memory size = 0x%llx\n", (unsigned long long)memblock.memory_size); 900 pr_info(" memory size = %#llx reserved size = %#llx\n",
901 (unsigned long long)memblock.memory.total_size,
902 (unsigned long long)memblock.reserved.total_size);
757 903
758 memblock_dump(&memblock.memory, "memory"); 904 memblock_dump(&memblock.memory, "memory");
759 memblock_dump(&memblock.reserved, "reserved"); 905 memblock_dump(&memblock.reserved, "reserved");
760} 906}
761 907
762void __init memblock_analyze(void) 908void __init memblock_allow_resize(void)
763{ 909{
764 int i;
765
766 /* Check marker in the unused last array entry */
767 WARN_ON(memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS].base
768 != MEMBLOCK_INACTIVE);
769 WARN_ON(memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS].base
770 != MEMBLOCK_INACTIVE);
771
772 memblock.memory_size = 0;
773
774 for (i = 0; i < memblock.memory.cnt; i++)
775 memblock.memory_size += memblock.memory.regions[i].size;
776
777 /* We allow resizing from there */
778 memblock_can_resize = 1; 910 memblock_can_resize = 1;
779} 911}
780 912
781void __init memblock_init(void)
782{
783 static int init_done __initdata = 0;
784
785 if (init_done)
786 return;
787 init_done = 1;
788
789 /* Hookup the initial arrays */
790 memblock.memory.regions = memblock_memory_init_regions;
791 memblock.memory.max = INIT_MEMBLOCK_REGIONS;
792 memblock.reserved.regions = memblock_reserved_init_regions;
793 memblock.reserved.max = INIT_MEMBLOCK_REGIONS;
794
795 /* Write a marker in the unused last array entry */
796 memblock.memory.regions[INIT_MEMBLOCK_REGIONS].base = MEMBLOCK_INACTIVE;
797 memblock.reserved.regions[INIT_MEMBLOCK_REGIONS].base = MEMBLOCK_INACTIVE;
798
799 /* Create a dummy zero size MEMBLOCK which will get coalesced away later.
800 * This simplifies the memblock_add() code below...
801 */
802 memblock.memory.regions[0].base = 0;
803 memblock.memory.regions[0].size = 0;
804 memblock.memory.cnt = 1;
805
806 /* Ditto. */
807 memblock.reserved.regions[0].base = 0;
808 memblock.reserved.regions[0].size = 0;
809 memblock.reserved.cnt = 1;
810
811 memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE;
812}
813
814static int __init early_memblock(char *p) 913static int __init early_memblock(char *p)
815{ 914{
816 if (p && strstr(p, "debug")) 915 if (p && strstr(p, "debug"))
@@ -819,7 +918,7 @@ static int __init early_memblock(char *p)
819} 918}
820early_param("memblock", early_memblock); 919early_param("memblock", early_memblock);
821 920
822#if defined(CONFIG_DEBUG_FS) && !defined(ARCH_DISCARD_MEMBLOCK) 921#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_ARCH_DISCARD_MEMBLOCK)
823 922
824static int memblock_debug_show(struct seq_file *m, void *private) 923static int memblock_debug_show(struct seq_file *m, void *private)
825{ 924{
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 7fa41b4a07bf..24f0fc1a56d6 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -41,14 +41,13 @@ static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
41 if (limit > memblock.current_limit) 41 if (limit > memblock.current_limit)
42 limit = memblock.current_limit; 42 limit = memblock.current_limit;
43 43
44 addr = find_memory_core_early(nid, size, align, goal, limit); 44 addr = memblock_find_in_range_node(goal, limit, size, align, nid);
45 45 if (!addr)
46 if (addr == MEMBLOCK_ERROR)
47 return NULL; 46 return NULL;
48 47
49 ptr = phys_to_virt(addr); 48 ptr = phys_to_virt(addr);
50 memset(ptr, 0, size); 49 memset(ptr, 0, size);
51 memblock_x86_reserve_range(addr, addr + size, "BOOTMEM"); 50 memblock_reserve(addr, size);
52 /* 51 /*
53 * The min_count is set to 0 so that bootmem allocated blocks 52 * The min_count is set to 0 so that bootmem allocated blocks
54 * are never reported as leaks. 53 * are never reported as leaks.
@@ -107,23 +106,27 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end)
107 __free_pages_bootmem(pfn_to_page(i), 0); 106 __free_pages_bootmem(pfn_to_page(i), 0);
108} 107}
109 108
110unsigned long __init free_all_memory_core_early(int nodeid) 109unsigned long __init free_low_memory_core_early(int nodeid)
111{ 110{
112 int i;
113 u64 start, end;
114 unsigned long count = 0; 111 unsigned long count = 0;
115 struct range *range = NULL; 112 phys_addr_t start, end;
116 int nr_range; 113 u64 i;
117 114
118 nr_range = get_free_all_memory_range(&range, nodeid); 115 /* free reserved array temporarily so that it's treated as free area */
119 116 memblock_free_reserved_regions();
120 for (i = 0; i < nr_range; i++) { 117
121 start = range[i].start; 118 for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) {
122 end = range[i].end; 119 unsigned long start_pfn = PFN_UP(start);
123 count += end - start; 120 unsigned long end_pfn = min_t(unsigned long,
124 __free_pages_memory(start, end); 121 PFN_DOWN(end), max_low_pfn);
122 if (start_pfn < end_pfn) {
123 __free_pages_memory(start_pfn, end_pfn);
124 count += end_pfn - start_pfn;
125 }
125 } 126 }
126 127
128 /* put region array back? */
129 memblock_reserve_reserved_regions();
127 return count; 130 return count;
128} 131}
129 132
@@ -137,7 +140,7 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
137{ 140{
138 register_page_bootmem_info_node(pgdat); 141 register_page_bootmem_info_node(pgdat);
139 142
140 /* free_all_memory_core_early(MAX_NUMNODES) will be called later */ 143 /* free_low_memory_core_early(MAX_NUMNODES) will be called later */
141 return 0; 144 return 0;
142} 145}
143 146
@@ -155,7 +158,7 @@ unsigned long __init free_all_bootmem(void)
155 * Use MAX_NUMNODES will make sure all ranges in early_node_map[] 158 * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
156 * will be used instead of only Node0 related 159 * will be used instead of only Node0 related
157 */ 160 */
158 return free_all_memory_core_early(MAX_NUMNODES); 161 return free_low_memory_core_early(MAX_NUMNODES);
159} 162}
160 163
161/** 164/**
@@ -172,7 +175,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
172 unsigned long size) 175 unsigned long size)
173{ 176{
174 kmemleak_free_part(__va(physaddr), size); 177 kmemleak_free_part(__va(physaddr), size);
175 memblock_x86_free_range(physaddr, physaddr + size); 178 memblock_free(physaddr, size);
176} 179}
177 180
178/** 181/**
@@ -187,7 +190,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
187void __init free_bootmem(unsigned long addr, unsigned long size) 190void __init free_bootmem(unsigned long addr, unsigned long size)
188{ 191{
189 kmemleak_free_part(__va(addr), size); 192 kmemleak_free_part(__va(addr), size);
190 memblock_x86_free_range(addr, addr + size); 193 memblock_free(addr, size);
191} 194}
192 195
193static void * __init ___alloc_bootmem_nopanic(unsigned long size, 196static void * __init ___alloc_bootmem_nopanic(unsigned long size,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2b8ba3aebf6e..bdc804c2d99c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -181,39 +181,17 @@ static unsigned long __meminitdata nr_kernel_pages;
181static unsigned long __meminitdata nr_all_pages; 181static unsigned long __meminitdata nr_all_pages;
182static unsigned long __meminitdata dma_reserve; 182static unsigned long __meminitdata dma_reserve;
183 183
184#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 184#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
185 /* 185static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
186 * MAX_ACTIVE_REGIONS determines the maximum number of distinct 186static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
187 * ranges of memory (RAM) that may be registered with add_active_range(). 187static unsigned long __initdata required_kernelcore;
188 * Ranges passed to add_active_range() will be merged if possible 188static unsigned long __initdata required_movablecore;
189 * so the number of times add_active_range() can be called is 189static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
190 * related to the number of nodes and the number of holes 190
191 */ 191/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
192 #ifdef CONFIG_MAX_ACTIVE_REGIONS 192int movable_zone;
193 /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */ 193EXPORT_SYMBOL(movable_zone);
194 #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS 194#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
195 #else
196 #if MAX_NUMNODES >= 32
197 /* If there can be many nodes, allow up to 50 holes per node */
198 #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50)
199 #else
200 /* By default, allow up to 256 distinct regions */
201 #define MAX_ACTIVE_REGIONS 256
202 #endif
203 #endif
204
205 static struct node_active_region __meminitdata early_node_map[MAX_ACTIVE_REGIONS];
206 static int __meminitdata nr_nodemap_entries;
207 static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
208 static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
209 static unsigned long __initdata required_kernelcore;
210 static unsigned long __initdata required_movablecore;
211 static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
212
213 /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
214 int movable_zone;
215 EXPORT_SYMBOL(movable_zone);
216#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
217 195
218#if MAX_NUMNODES > 1 196#if MAX_NUMNODES > 1
219int nr_node_ids __read_mostly = MAX_NUMNODES; 197int nr_node_ids __read_mostly = MAX_NUMNODES;
@@ -706,10 +684,10 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
706 int loop; 684 int loop;
707 685
708 prefetchw(page); 686 prefetchw(page);
709 for (loop = 0; loop < BITS_PER_LONG; loop++) { 687 for (loop = 0; loop < (1 << order); loop++) {
710 struct page *p = &page[loop]; 688 struct page *p = &page[loop];
711 689
712 if (loop + 1 < BITS_PER_LONG) 690 if (loop + 1 < (1 << order))
713 prefetchw(p + 1); 691 prefetchw(p + 1);
714 __ClearPageReserved(p); 692 __ClearPageReserved(p);
715 set_page_count(p, 0); 693 set_page_count(p, 0);
@@ -3737,35 +3715,7 @@ __meminit int init_currently_empty_zone(struct zone *zone,
3737 return 0; 3715 return 0;
3738} 3716}
3739 3717
3740#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 3718#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
3741/*
3742 * Basic iterator support. Return the first range of PFNs for a node
3743 * Note: nid == MAX_NUMNODES returns first region regardless of node
3744 */
3745static int __meminit first_active_region_index_in_nid(int nid)
3746{
3747 int i;
3748
3749 for (i = 0; i < nr_nodemap_entries; i++)
3750 if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
3751 return i;
3752
3753 return -1;
3754}
3755
3756/*
3757 * Basic iterator support. Return the next active range of PFNs for a node
3758 * Note: nid == MAX_NUMNODES returns next region regardless of node
3759 */
3760static int __meminit next_active_region_index_in_nid(int index, int nid)
3761{
3762 for (index = index + 1; index < nr_nodemap_entries; index++)
3763 if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
3764 return index;
3765
3766 return -1;
3767}
3768
3769#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID 3719#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
3770/* 3720/*
3771 * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. 3721 * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
@@ -3775,15 +3725,12 @@ static int __meminit next_active_region_index_in_nid(int index, int nid)
3775 */ 3725 */
3776int __meminit __early_pfn_to_nid(unsigned long pfn) 3726int __meminit __early_pfn_to_nid(unsigned long pfn)
3777{ 3727{
3778 int i; 3728 unsigned long start_pfn, end_pfn;
3779 3729 int i, nid;
3780 for (i = 0; i < nr_nodemap_entries; i++) {
3781 unsigned long start_pfn = early_node_map[i].start_pfn;
3782 unsigned long end_pfn = early_node_map[i].end_pfn;
3783 3730
3731 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
3784 if (start_pfn <= pfn && pfn < end_pfn) 3732 if (start_pfn <= pfn && pfn < end_pfn)
3785 return early_node_map[i].nid; 3733 return nid;
3786 }
3787 /* This is a memory hole */ 3734 /* This is a memory hole */
3788 return -1; 3735 return -1;
3789} 3736}
@@ -3812,11 +3759,6 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
3812} 3759}
3813#endif 3760#endif
3814 3761
3815/* Basic iterator support to walk early_node_map[] */
3816#define for_each_active_range_index_in_nid(i, nid) \
3817 for (i = first_active_region_index_in_nid(nid); i != -1; \
3818 i = next_active_region_index_in_nid(i, nid))
3819
3820/** 3762/**
3821 * free_bootmem_with_active_regions - Call free_bootmem_node for each active range 3763 * free_bootmem_with_active_regions - Call free_bootmem_node for each active range
3822 * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. 3764 * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
@@ -3826,122 +3768,34 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
3826 * add_active_ranges() contain no holes and may be freed, this 3768 * add_active_ranges() contain no holes and may be freed, this
3827 * this function may be used instead of calling free_bootmem() manually. 3769 * this function may be used instead of calling free_bootmem() manually.
3828 */ 3770 */
3829void __init free_bootmem_with_active_regions(int nid, 3771void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
3830 unsigned long max_low_pfn)
3831{
3832 int i;
3833
3834 for_each_active_range_index_in_nid(i, nid) {
3835 unsigned long size_pages = 0;
3836 unsigned long end_pfn = early_node_map[i].end_pfn;
3837
3838 if (early_node_map[i].start_pfn >= max_low_pfn)
3839 continue;
3840
3841 if (end_pfn > max_low_pfn)
3842 end_pfn = max_low_pfn;
3843
3844 size_pages = end_pfn - early_node_map[i].start_pfn;
3845 free_bootmem_node(NODE_DATA(early_node_map[i].nid),
3846 PFN_PHYS(early_node_map[i].start_pfn),
3847 size_pages << PAGE_SHIFT);
3848 }
3849}
3850
3851#ifdef CONFIG_HAVE_MEMBLOCK
3852/*
3853 * Basic iterator support. Return the last range of PFNs for a node
3854 * Note: nid == MAX_NUMNODES returns last region regardless of node
3855 */
3856static int __meminit last_active_region_index_in_nid(int nid)
3857{ 3772{
3858 int i; 3773 unsigned long start_pfn, end_pfn;
3859 3774 int i, this_nid;
3860 for (i = nr_nodemap_entries - 1; i >= 0; i--)
3861 if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
3862 return i;
3863
3864 return -1;
3865}
3866
3867/*
3868 * Basic iterator support. Return the previous active range of PFNs for a node
3869 * Note: nid == MAX_NUMNODES returns next region regardless of node
3870 */
3871static int __meminit previous_active_region_index_in_nid(int index, int nid)
3872{
3873 for (index = index - 1; index >= 0; index--)
3874 if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
3875 return index;
3876
3877 return -1;
3878}
3879
3880#define for_each_active_range_index_in_nid_reverse(i, nid) \
3881 for (i = last_active_region_index_in_nid(nid); i != -1; \
3882 i = previous_active_region_index_in_nid(i, nid))
3883
3884u64 __init find_memory_core_early(int nid, u64 size, u64 align,
3885 u64 goal, u64 limit)
3886{
3887 int i;
3888
3889 /* Need to go over early_node_map to find out good range for node */
3890 for_each_active_range_index_in_nid_reverse(i, nid) {
3891 u64 addr;
3892 u64 ei_start, ei_last;
3893 u64 final_start, final_end;
3894
3895 ei_last = early_node_map[i].end_pfn;
3896 ei_last <<= PAGE_SHIFT;
3897 ei_start = early_node_map[i].start_pfn;
3898 ei_start <<= PAGE_SHIFT;
3899
3900 final_start = max(ei_start, goal);
3901 final_end = min(ei_last, limit);
3902
3903 if (final_start >= final_end)
3904 continue;
3905
3906 addr = memblock_find_in_range(final_start, final_end, size, align);
3907 3775
3908 if (addr == MEMBLOCK_ERROR) 3776 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) {
3909 continue; 3777 start_pfn = min(start_pfn, max_low_pfn);
3778 end_pfn = min(end_pfn, max_low_pfn);
3910 3779
3911 return addr; 3780 if (start_pfn < end_pfn)
3781 free_bootmem_node(NODE_DATA(this_nid),
3782 PFN_PHYS(start_pfn),
3783 (end_pfn - start_pfn) << PAGE_SHIFT);
3912 } 3784 }
3913
3914 return MEMBLOCK_ERROR;
3915} 3785}
3916#endif
3917 3786
3918int __init add_from_early_node_map(struct range *range, int az, 3787int __init add_from_early_node_map(struct range *range, int az,
3919 int nr_range, int nid) 3788 int nr_range, int nid)
3920{ 3789{
3790 unsigned long start_pfn, end_pfn;
3921 int i; 3791 int i;
3922 u64 start, end;
3923 3792
3924 /* need to go over early_node_map to find out good range for node */ 3793 /* need to go over early_node_map to find out good range for node */
3925 for_each_active_range_index_in_nid(i, nid) { 3794 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL)
3926 start = early_node_map[i].start_pfn; 3795 nr_range = add_range(range, az, nr_range, start_pfn, end_pfn);
3927 end = early_node_map[i].end_pfn;
3928 nr_range = add_range(range, az, nr_range, start, end);
3929 }
3930 return nr_range; 3796 return nr_range;
3931} 3797}
3932 3798
3933void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3934{
3935 int i;
3936 int ret;
3937
3938 for_each_active_range_index_in_nid(i, nid) {
3939 ret = work_fn(early_node_map[i].start_pfn,
3940 early_node_map[i].end_pfn, data);
3941 if (ret)
3942 break;
3943 }
3944}
3945/** 3799/**
3946 * sparse_memory_present_with_active_regions - Call memory_present for each active range 3800 * sparse_memory_present_with_active_regions - Call memory_present for each active range
3947 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. 3801 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
@@ -3952,12 +3806,11 @@ void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3952 */ 3806 */
3953void __init sparse_memory_present_with_active_regions(int nid) 3807void __init sparse_memory_present_with_active_regions(int nid)
3954{ 3808{
3955 int i; 3809 unsigned long start_pfn, end_pfn;
3810 int i, this_nid;
3956 3811
3957 for_each_active_range_index_in_nid(i, nid) 3812 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
3958 memory_present(early_node_map[i].nid, 3813 memory_present(this_nid, start_pfn, end_pfn);
3959 early_node_map[i].start_pfn,
3960 early_node_map[i].end_pfn);
3961} 3814}
3962 3815
3963/** 3816/**
@@ -3974,13 +3827,15 @@ void __init sparse_memory_present_with_active_regions(int nid)
3974void __meminit get_pfn_range_for_nid(unsigned int nid, 3827void __meminit get_pfn_range_for_nid(unsigned int nid,
3975 unsigned long *start_pfn, unsigned long *end_pfn) 3828 unsigned long *start_pfn, unsigned long *end_pfn)
3976{ 3829{
3830 unsigned long this_start_pfn, this_end_pfn;
3977 int i; 3831 int i;
3832
3978 *start_pfn = -1UL; 3833 *start_pfn = -1UL;
3979 *end_pfn = 0; 3834 *end_pfn = 0;
3980 3835
3981 for_each_active_range_index_in_nid(i, nid) { 3836 for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) {
3982 *start_pfn = min(*start_pfn, early_node_map[i].start_pfn); 3837 *start_pfn = min(*start_pfn, this_start_pfn);
3983 *end_pfn = max(*end_pfn, early_node_map[i].end_pfn); 3838 *end_pfn = max(*end_pfn, this_end_pfn);
3984 } 3839 }
3985 3840
3986 if (*start_pfn == -1UL) 3841 if (*start_pfn == -1UL)
@@ -4083,46 +3938,16 @@ unsigned long __meminit __absent_pages_in_range(int nid,
4083 unsigned long range_start_pfn, 3938 unsigned long range_start_pfn,
4084 unsigned long range_end_pfn) 3939 unsigned long range_end_pfn)
4085{ 3940{
4086 int i = 0; 3941 unsigned long nr_absent = range_end_pfn - range_start_pfn;
4087 unsigned long prev_end_pfn = 0, hole_pages = 0; 3942 unsigned long start_pfn, end_pfn;
4088 unsigned long start_pfn; 3943 int i;
4089
4090 /* Find the end_pfn of the first active range of pfns in the node */
4091 i = first_active_region_index_in_nid(nid);
4092 if (i == -1)
4093 return 0;
4094
4095 prev_end_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
4096
4097 /* Account for ranges before physical memory on this node */
4098 if (early_node_map[i].start_pfn > range_start_pfn)
4099 hole_pages = prev_end_pfn - range_start_pfn;
4100
4101 /* Find all holes for the zone within the node */
4102 for (; i != -1; i = next_active_region_index_in_nid(i, nid)) {
4103
4104 /* No need to continue if prev_end_pfn is outside the zone */
4105 if (prev_end_pfn >= range_end_pfn)
4106 break;
4107
4108 /* Make sure the end of the zone is not within the hole */
4109 start_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
4110 prev_end_pfn = max(prev_end_pfn, range_start_pfn);
4111 3944
4112 /* Update the hole size cound and move on */ 3945 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
4113 if (start_pfn > range_start_pfn) { 3946 start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
4114 BUG_ON(prev_end_pfn > start_pfn); 3947 end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
4115 hole_pages += start_pfn - prev_end_pfn; 3948 nr_absent -= end_pfn - start_pfn;
4116 }
4117 prev_end_pfn = early_node_map[i].end_pfn;
4118 } 3949 }
4119 3950 return nr_absent;
4120 /* Account for ranges past physical memory on this node */
4121 if (range_end_pfn > prev_end_pfn)
4122 hole_pages += range_end_pfn -
4123 max(range_start_pfn, prev_end_pfn);
4124
4125 return hole_pages;
4126} 3951}
4127 3952
4128/** 3953/**
@@ -4143,14 +3968,14 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
4143 unsigned long zone_type, 3968 unsigned long zone_type,
4144 unsigned long *ignored) 3969 unsigned long *ignored)
4145{ 3970{
3971 unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
3972 unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
4146 unsigned long node_start_pfn, node_end_pfn; 3973 unsigned long node_start_pfn, node_end_pfn;
4147 unsigned long zone_start_pfn, zone_end_pfn; 3974 unsigned long zone_start_pfn, zone_end_pfn;
4148 3975
4149 get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); 3976 get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
4150 zone_start_pfn = max(arch_zone_lowest_possible_pfn[zone_type], 3977 zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
4151 node_start_pfn); 3978 zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
4152 zone_end_pfn = min(arch_zone_highest_possible_pfn[zone_type],
4153 node_end_pfn);
4154 3979
4155 adjust_zone_range_for_zone_movable(nid, zone_type, 3980 adjust_zone_range_for_zone_movable(nid, zone_type,
4156 node_start_pfn, node_end_pfn, 3981 node_start_pfn, node_end_pfn,
@@ -4158,7 +3983,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
4158 return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); 3983 return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
4159} 3984}
4160 3985
4161#else 3986#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4162static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, 3987static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
4163 unsigned long zone_type, 3988 unsigned long zone_type,
4164 unsigned long *zones_size) 3989 unsigned long *zones_size)
@@ -4176,7 +4001,7 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
4176 return zholes_size[zone_type]; 4001 return zholes_size[zone_type];
4177} 4002}
4178 4003
4179#endif 4004#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4180 4005
4181static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, 4006static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
4182 unsigned long *zones_size, unsigned long *zholes_size) 4007 unsigned long *zones_size, unsigned long *zholes_size)
@@ -4399,10 +4224,10 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
4399 */ 4224 */
4400 if (pgdat == NODE_DATA(0)) { 4225 if (pgdat == NODE_DATA(0)) {
4401 mem_map = NODE_DATA(0)->node_mem_map; 4226 mem_map = NODE_DATA(0)->node_mem_map;
4402#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 4227#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4403 if (page_to_pfn(mem_map) != pgdat->node_start_pfn) 4228 if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
4404 mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET); 4229 mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
4405#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ 4230#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4406 } 4231 }
4407#endif 4232#endif
4408#endif /* CONFIG_FLAT_NODE_MEM_MAP */ 4233#endif /* CONFIG_FLAT_NODE_MEM_MAP */
@@ -4427,7 +4252,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
4427 free_area_init_core(pgdat, zones_size, zholes_size); 4252 free_area_init_core(pgdat, zones_size, zholes_size);
4428} 4253}
4429 4254
4430#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 4255#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4431 4256
4432#if MAX_NUMNODES > 1 4257#if MAX_NUMNODES > 1
4433/* 4258/*
@@ -4449,170 +4274,6 @@ static inline void setup_nr_node_ids(void)
4449#endif 4274#endif
4450 4275
4451/** 4276/**
4452 * add_active_range - Register a range of PFNs backed by physical memory
4453 * @nid: The node ID the range resides on
4454 * @start_pfn: The start PFN of the available physical memory
4455 * @end_pfn: The end PFN of the available physical memory
4456 *
4457 * These ranges are stored in an early_node_map[] and later used by
4458 * free_area_init_nodes() to calculate zone sizes and holes. If the
4459 * range spans a memory hole, it is up to the architecture to ensure
4460 * the memory is not freed by the bootmem allocator. If possible
4461 * the range being registered will be merged with existing ranges.
4462 */
4463void __init add_active_range(unsigned int nid, unsigned long start_pfn,
4464 unsigned long end_pfn)
4465{
4466 int i;
4467
4468 mminit_dprintk(MMINIT_TRACE, "memory_register",
4469 "Entering add_active_range(%d, %#lx, %#lx) "
4470 "%d entries of %d used\n",
4471 nid, start_pfn, end_pfn,
4472 nr_nodemap_entries, MAX_ACTIVE_REGIONS);
4473
4474 mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
4475
4476 /* Merge with existing active regions if possible */
4477 for (i = 0; i < nr_nodemap_entries; i++) {
4478 if (early_node_map[i].nid != nid)
4479 continue;
4480
4481 /* Skip if an existing region covers this new one */
4482 if (start_pfn >= early_node_map[i].start_pfn &&
4483 end_pfn <= early_node_map[i].end_pfn)
4484 return;
4485
4486 /* Merge forward if suitable */
4487 if (start_pfn <= early_node_map[i].end_pfn &&
4488 end_pfn > early_node_map[i].end_pfn) {
4489 early_node_map[i].end_pfn = end_pfn;
4490 return;
4491 }
4492
4493 /* Merge backward if suitable */
4494 if (start_pfn < early_node_map[i].start_pfn &&
4495 end_pfn >= early_node_map[i].start_pfn) {
4496 early_node_map[i].start_pfn = start_pfn;
4497 return;
4498 }
4499 }
4500
4501 /* Check that early_node_map is large enough */
4502 if (i >= MAX_ACTIVE_REGIONS) {
4503 printk(KERN_CRIT "More than %d memory regions, truncating\n",
4504 MAX_ACTIVE_REGIONS);
4505 return;
4506 }
4507
4508 early_node_map[i].nid = nid;
4509 early_node_map[i].start_pfn = start_pfn;
4510 early_node_map[i].end_pfn = end_pfn;
4511 nr_nodemap_entries = i + 1;
4512}
4513
4514/**
4515 * remove_active_range - Shrink an existing registered range of PFNs
4516 * @nid: The node id the range is on that should be shrunk
4517 * @start_pfn: The new PFN of the range
4518 * @end_pfn: The new PFN of the range
4519 *
4520 * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
4521 * The map is kept near the end physical page range that has already been
4522 * registered. This function allows an arch to shrink an existing registered
4523 * range.
4524 */
4525void __init remove_active_range(unsigned int nid, unsigned long start_pfn,
4526 unsigned long end_pfn)
4527{
4528 int i, j;
4529 int removed = 0;
4530
4531 printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n",
4532 nid, start_pfn, end_pfn);
4533
4534 /* Find the old active region end and shrink */
4535 for_each_active_range_index_in_nid(i, nid) {
4536 if (early_node_map[i].start_pfn >= start_pfn &&
4537 early_node_map[i].end_pfn <= end_pfn) {
4538 /* clear it */
4539 early_node_map[i].start_pfn = 0;
4540 early_node_map[i].end_pfn = 0;
4541 removed = 1;
4542 continue;
4543 }
4544 if (early_node_map[i].start_pfn < start_pfn &&
4545 early_node_map[i].end_pfn > start_pfn) {
4546 unsigned long temp_end_pfn = early_node_map[i].end_pfn;
4547 early_node_map[i].end_pfn = start_pfn;
4548 if (temp_end_pfn > end_pfn)
4549 add_active_range(nid, end_pfn, temp_end_pfn);
4550 continue;
4551 }
4552 if (early_node_map[i].start_pfn >= start_pfn &&
4553 early_node_map[i].end_pfn > end_pfn &&
4554 early_node_map[i].start_pfn < end_pfn) {
4555 early_node_map[i].start_pfn = end_pfn;
4556 continue;
4557 }
4558 }
4559
4560 if (!removed)
4561 return;
4562
4563 /* remove the blank ones */
4564 for (i = nr_nodemap_entries - 1; i > 0; i--) {
4565 if (early_node_map[i].nid != nid)
4566 continue;
4567 if (early_node_map[i].end_pfn)
4568 continue;
4569 /* we found it, get rid of it */
4570 for (j = i; j < nr_nodemap_entries - 1; j++)
4571 memcpy(&early_node_map[j], &early_node_map[j+1],
4572 sizeof(early_node_map[j]));
4573 j = nr_nodemap_entries - 1;
4574 memset(&early_node_map[j], 0, sizeof(early_node_map[j]));
4575 nr_nodemap_entries--;
4576 }
4577}
4578
4579/**
4580 * remove_all_active_ranges - Remove all currently registered regions
4581 *
4582 * During discovery, it may be found that a table like SRAT is invalid
4583 * and an alternative discovery method must be used. This function removes
4584 * all currently registered regions.
4585 */
4586void __init remove_all_active_ranges(void)
4587{
4588 memset(early_node_map, 0, sizeof(early_node_map));
4589 nr_nodemap_entries = 0;
4590}
4591
4592/* Compare two active node_active_regions */
4593static int __init cmp_node_active_region(const void *a, const void *b)
4594{
4595 struct node_active_region *arange = (struct node_active_region *)a;
4596 struct node_active_region *brange = (struct node_active_region *)b;
4597
4598 /* Done this way to avoid overflows */
4599 if (arange->start_pfn > brange->start_pfn)
4600 return 1;
4601 if (arange->start_pfn < brange->start_pfn)
4602 return -1;
4603
4604 return 0;
4605}
4606
4607/* sort the node_map by start_pfn */
4608void __init sort_node_map(void)
4609{
4610 sort(early_node_map, (size_t)nr_nodemap_entries,
4611 sizeof(struct node_active_region),
4612 cmp_node_active_region, NULL);
4613}
4614
4615/**
4616 * node_map_pfn_alignment - determine the maximum internode alignment 4277 * node_map_pfn_alignment - determine the maximum internode alignment
4617 * 4278 *
4618 * This function should be called after node map is populated and sorted. 4279 * This function should be called after node map is populated and sorted.
@@ -4634,15 +4295,11 @@ void __init sort_node_map(void)
4634unsigned long __init node_map_pfn_alignment(void) 4295unsigned long __init node_map_pfn_alignment(void)
4635{ 4296{
4636 unsigned long accl_mask = 0, last_end = 0; 4297 unsigned long accl_mask = 0, last_end = 0;
4298 unsigned long start, end, mask;
4637 int last_nid = -1; 4299 int last_nid = -1;
4638 int i; 4300 int i, nid;
4639
4640 for_each_active_range_index_in_nid(i, MAX_NUMNODES) {
4641 int nid = early_node_map[i].nid;
4642 unsigned long start = early_node_map[i].start_pfn;
4643 unsigned long end = early_node_map[i].end_pfn;
4644 unsigned long mask;
4645 4301
4302 for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) {
4646 if (!start || last_nid < 0 || last_nid == nid) { 4303 if (!start || last_nid < 0 || last_nid == nid) {
4647 last_nid = nid; 4304 last_nid = nid;
4648 last_end = end; 4305 last_end = end;
@@ -4669,12 +4326,12 @@ unsigned long __init node_map_pfn_alignment(void)
4669/* Find the lowest pfn for a node */ 4326/* Find the lowest pfn for a node */
4670static unsigned long __init find_min_pfn_for_node(int nid) 4327static unsigned long __init find_min_pfn_for_node(int nid)
4671{ 4328{
4672 int i;
4673 unsigned long min_pfn = ULONG_MAX; 4329 unsigned long min_pfn = ULONG_MAX;
4330 unsigned long start_pfn;
4331 int i;
4674 4332
4675 /* Assuming a sorted map, the first range found has the starting pfn */ 4333 for_each_mem_pfn_range(i, nid, &start_pfn, NULL, NULL)
4676 for_each_active_range_index_in_nid(i, nid) 4334 min_pfn = min(min_pfn, start_pfn);
4677 min_pfn = min(min_pfn, early_node_map[i].start_pfn);
4678 4335
4679 if (min_pfn == ULONG_MAX) { 4336 if (min_pfn == ULONG_MAX) {
4680 printk(KERN_WARNING 4337 printk(KERN_WARNING
@@ -4703,15 +4360,16 @@ unsigned long __init find_min_pfn_with_active_regions(void)
4703 */ 4360 */
4704static unsigned long __init early_calculate_totalpages(void) 4361static unsigned long __init early_calculate_totalpages(void)
4705{ 4362{
4706 int i;
4707 unsigned long totalpages = 0; 4363 unsigned long totalpages = 0;
4364 unsigned long start_pfn, end_pfn;
4365 int i, nid;
4366
4367 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
4368 unsigned long pages = end_pfn - start_pfn;
4708 4369
4709 for (i = 0; i < nr_nodemap_entries; i++) {
4710 unsigned long pages = early_node_map[i].end_pfn -
4711 early_node_map[i].start_pfn;
4712 totalpages += pages; 4370 totalpages += pages;
4713 if (pages) 4371 if (pages)
4714 node_set_state(early_node_map[i].nid, N_HIGH_MEMORY); 4372 node_set_state(nid, N_HIGH_MEMORY);
4715 } 4373 }
4716 return totalpages; 4374 return totalpages;
4717} 4375}
@@ -4766,6 +4424,8 @@ restart:
4766 /* Spread kernelcore memory as evenly as possible throughout nodes */ 4424 /* Spread kernelcore memory as evenly as possible throughout nodes */
4767 kernelcore_node = required_kernelcore / usable_nodes; 4425 kernelcore_node = required_kernelcore / usable_nodes;
4768 for_each_node_state(nid, N_HIGH_MEMORY) { 4426 for_each_node_state(nid, N_HIGH_MEMORY) {
4427 unsigned long start_pfn, end_pfn;
4428
4769 /* 4429 /*
4770 * Recalculate kernelcore_node if the division per node 4430 * Recalculate kernelcore_node if the division per node
4771 * now exceeds what is necessary to satisfy the requested 4431 * now exceeds what is necessary to satisfy the requested
@@ -4782,13 +4442,10 @@ restart:
4782 kernelcore_remaining = kernelcore_node; 4442 kernelcore_remaining = kernelcore_node;
4783 4443
4784 /* Go through each range of PFNs within this node */ 4444 /* Go through each range of PFNs within this node */
4785 for_each_active_range_index_in_nid(i, nid) { 4445 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
4786 unsigned long start_pfn, end_pfn;
4787 unsigned long size_pages; 4446 unsigned long size_pages;
4788 4447
4789 start_pfn = max(early_node_map[i].start_pfn, 4448 start_pfn = max(start_pfn, zone_movable_pfn[nid]);
4790 zone_movable_pfn[nid]);
4791 end_pfn = early_node_map[i].end_pfn;
4792 if (start_pfn >= end_pfn) 4449 if (start_pfn >= end_pfn)
4793 continue; 4450 continue;
4794 4451
@@ -4890,11 +4547,8 @@ static void check_for_regular_memory(pg_data_t *pgdat)
4890 */ 4547 */
4891void __init free_area_init_nodes(unsigned long *max_zone_pfn) 4548void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4892{ 4549{
4893 unsigned long nid; 4550 unsigned long start_pfn, end_pfn;
4894 int i; 4551 int i, nid;
4895
4896 /* Sort early_node_map as initialisation assumes it is sorted */
4897 sort_node_map();
4898 4552
4899 /* Record where the zone boundaries are */ 4553 /* Record where the zone boundaries are */
4900 memset(arch_zone_lowest_possible_pfn, 0, 4554 memset(arch_zone_lowest_possible_pfn, 0,
@@ -4941,11 +4595,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4941 } 4595 }
4942 4596
4943 /* Print out the early_node_map[] */ 4597 /* Print out the early_node_map[] */
4944 printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries); 4598 printk("Early memory PFN ranges\n");
4945 for (i = 0; i < nr_nodemap_entries; i++) 4599 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
4946 printk(" %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid, 4600 printk(" %3d: %0#10lx -> %0#10lx\n", nid, start_pfn, end_pfn);
4947 early_node_map[i].start_pfn,
4948 early_node_map[i].end_pfn);
4949 4601
4950 /* Initialise every node */ 4602 /* Initialise every node */
4951 mminit_verify_pageflags_layout(); 4603 mminit_verify_pageflags_layout();
@@ -4998,7 +4650,7 @@ static int __init cmdline_parse_movablecore(char *p)
4998early_param("kernelcore", cmdline_parse_kernelcore); 4650early_param("kernelcore", cmdline_parse_kernelcore);
4999early_param("movablecore", cmdline_parse_movablecore); 4651early_param("movablecore", cmdline_parse_movablecore);
5000 4652
5001#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ 4653#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
5002 4654
5003/** 4655/**
5004 * set_dma_reserve - set the specified number of pages reserved in the first zone 4656 * set_dma_reserve - set the specified number of pages reserved in the first zone