aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2011-03-22 19:33:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-22 20:44:09 -0400
commit8f7a66051b7523108c5aefb08c6a637e54aedc47 (patch)
tree212611c9ac7c4d9f0918e431043b3fc3f9fd43b6 /mm
parent84be48d84a53044e13aa8816aab201ab5480815d (diff)
mm/memblock: properly handle overlaps and fix error path
Currently memblock_reserve() or memblock_free() don't handle overlaps of any kind. There is some special casing for coalescing exactly adjacent regions but that's about it. This is annoying because typically memblock_reserve() is used to mark regions passed by the firmware as reserved and we all know how much we can trust our firmwares... Also, with the current code, if we do something it doesn't handle right such as trying to memblock_reserve() a large range spanning multiple existing smaller reserved regions for example, or doing overlapping reservations, it can silently corrupt the internal region array, causing odd errors much later on, such as allocations returning reserved regions etc... This patch rewrites the underlying functions that add or remove a region to the arrays. The new code is a lot more robust as it fully handles overlapping regions. It's also, imho, simpler than the previous implementation. In addition, while doing so, I found a bug where if we fail to double the array while adding a region, we would remove the last region of the array rather than the region we just allocated. This fixes it too. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memblock.c241
1 files changed, 132 insertions, 109 deletions
diff --git a/mm/memblock.c b/mm/memblock.c
index 4618fda975a0..a0562d1a6ad4 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -58,28 +58,6 @@ static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, p
58 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); 58 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
59} 59}
60 60
61static long __init_memblock memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1,
62 phys_addr_t base2, phys_addr_t size2)
63{
64 if (base2 == base1 + size1)
65 return 1;
66 else if (base1 == base2 + size2)
67 return -1;
68
69 return 0;
70}
71
72static long __init_memblock memblock_regions_adjacent(struct memblock_type *type,
73 unsigned long r1, unsigned long r2)
74{
75 phys_addr_t base1 = type->regions[r1].base;
76 phys_addr_t size1 = type->regions[r1].size;
77 phys_addr_t base2 = type->regions[r2].base;
78 phys_addr_t size2 = type->regions[r2].size;
79
80 return memblock_addrs_adjacent(base1, size1, base2, size2);
81}
82
83long __init_memblock memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) 61long __init_memblock memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
84{ 62{
85 unsigned long i; 63 unsigned long i;
@@ -206,14 +184,13 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
206 type->regions[i].size = type->regions[i + 1].size; 184 type->regions[i].size = type->regions[i + 1].size;
207 } 185 }
208 type->cnt--; 186 type->cnt--;
209}
210 187
211/* Assumption: base addr of region 1 < base addr of region 2 */ 188 /* Special case for empty arrays */
212static void __init_memblock memblock_coalesce_regions(struct memblock_type *type, 189 if (type->cnt == 0) {
213 unsigned long r1, unsigned long r2) 190 type->cnt = 1;
214{ 191 type->regions[0].base = 0;
215 type->regions[r1].size += type->regions[r2].size; 192 type->regions[0].size = 0;
216 memblock_remove_region(type, r2); 193 }
217} 194}
218 195
219/* Defined below but needed now */ 196/* Defined below but needed now */
@@ -276,7 +253,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
276 return 0; 253 return 0;
277 254
278 /* Add the new reserved region now. Should not fail ! */ 255 /* Add the new reserved region now. Should not fail ! */
279 BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size) < 0); 256 BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size));
280 257
281 /* If the array wasn't our static init one, then free it. We only do 258 /* If the array wasn't our static init one, then free it. We only do
282 * that before SLAB is available as later on, we don't know whether 259 * that before SLAB is available as later on, we don't know whether
@@ -296,58 +273,99 @@ extern int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1
296 return 1; 273 return 1;
297} 274}
298 275
299static long __init_memblock memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) 276static long __init_memblock memblock_add_region(struct memblock_type *type,
277 phys_addr_t base, phys_addr_t size)
300{ 278{
301 unsigned long coalesced = 0; 279 phys_addr_t end = base + size;
302 long adjacent, i; 280 int i, slot = -1;
303
304 if ((type->cnt == 1) && (type->regions[0].size == 0)) {
305 type->regions[0].base = base;
306 type->regions[0].size = size;
307 return 0;
308 }
309 281
310 /* First try and coalesce this MEMBLOCK with another. */ 282 /* First try and coalesce this MEMBLOCK with others */
311 for (i = 0; i < type->cnt; i++) { 283 for (i = 0; i < type->cnt; i++) {
312 phys_addr_t rgnbase = type->regions[i].base; 284 struct memblock_region *rgn = &type->regions[i];
313 phys_addr_t rgnsize = type->regions[i].size; 285 phys_addr_t rend = rgn->base + rgn->size;
286
287 /* Exit if there's no possible hits */
288 if (rgn->base > end || rgn->size == 0)
289 break;
314 290
315 if ((rgnbase == base) && (rgnsize == size)) 291 /* Check if we are fully enclosed within an existing
316 /* Already have this region, so we're done */ 292 * block
293 */
294 if (rgn->base <= base && rend >= end)
317 return 0; 295 return 0;
318 296
319 adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize); 297 /* Check if we overlap or are adjacent with the bottom
320 /* Check if arch allows coalescing */ 298 * of a block.
321 if (adjacent != 0 && type == &memblock.memory && 299 */
322 !memblock_memory_can_coalesce(base, size, rgnbase, rgnsize)) 300 if (base < rgn->base && end >= rgn->base) {
323 break; 301 /* If we can't coalesce, create a new block */
324 if (adjacent > 0) { 302 if (!memblock_memory_can_coalesce(base, size,
325 type->regions[i].base -= size; 303 rgn->base,
326 type->regions[i].size += size; 304 rgn->size)) {
327 coalesced++; 305 /* Overlap & can't coalesce are mutually
328 break; 306 * exclusive, if you do that, be prepared
329 } else if (adjacent < 0) { 307 * for trouble
330 type->regions[i].size += size; 308 */
331 coalesced++; 309 WARN_ON(end != rgn->base);
332 break; 310 goto new_block;
311 }
312 /* We extend the bottom of the block down to our
313 * base
314 */
315 rgn->base = base;
316 rgn->size = rend - base;
317
318 /* Return if we have nothing else to allocate
319 * (fully coalesced)
320 */
321 if (rend >= end)
322 return 0;
323
324 /* We continue processing from the end of the
325 * coalesced block.
326 */
327 base = rend;
328 size = end - base;
329 }
330
331 /* Now check if we overlap or are adjacent with the
332 * top of a block
333 */
334 if (base <= rend && end >= rend) {
335 /* If we can't coalesce, create a new block */
336 if (!memblock_memory_can_coalesce(rgn->base,
337 rgn->size,
338 base, size)) {
339 /* Overlap & can't coalesce are mutually
340 * exclusive, if you do that, be prepared
341 * for trouble
342 */
343 WARN_ON(rend != base);
344 goto new_block;
345 }
346 /* We adjust our base down to enclose the
347 * original block and destroy it. It will be
348 * part of our new allocation. Since we've
349 * freed an entry, we know we won't fail
350 * to allocate one later, so we won't risk
351 * losing the original block allocation.
352 */
353 size += (base - rgn->base);
354 base = rgn->base;
355 memblock_remove_region(type, i--);
333 } 356 }
334 } 357 }
335 358
336 /* If we plugged a hole, we may want to also coalesce with the 359 /* If the array is empty, special case, replace the fake
337 * next region 360 * filler region and return
338 */ 361 */
339 if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1) && 362 if ((type->cnt == 1) && (type->regions[0].size == 0)) {
340 ((type != &memblock.memory || memblock_memory_can_coalesce(type->regions[i].base, 363 type->regions[0].base = base;
341 type->regions[i].size, 364 type->regions[0].size = size;
342 type->regions[i+1].base, 365 return 0;
343 type->regions[i+1].size)))) {
344 memblock_coalesce_regions(type, i, i+1);
345 coalesced++;
346 } 366 }
347 367
348 if (coalesced) 368 new_block:
349 return coalesced;
350
351 /* If we are out of space, we fail. It's too late to resize the array 369 /* If we are out of space, we fail. It's too late to resize the array
352 * but then this shouldn't have happened in the first place. 370 * but then this shouldn't have happened in the first place.
353 */ 371 */
@@ -362,13 +380,14 @@ static long __init_memblock memblock_add_region(struct memblock_type *type, phys
362 } else { 380 } else {
363 type->regions[i+1].base = base; 381 type->regions[i+1].base = base;
364 type->regions[i+1].size = size; 382 type->regions[i+1].size = size;
383 slot = i + 1;
365 break; 384 break;
366 } 385 }
367 } 386 }
368
369 if (base < type->regions[0].base) { 387 if (base < type->regions[0].base) {
370 type->regions[0].base = base; 388 type->regions[0].base = base;
371 type->regions[0].size = size; 389 type->regions[0].size = size;
390 slot = 0;
372 } 391 }
373 type->cnt++; 392 type->cnt++;
374 393
@@ -376,7 +395,8 @@ static long __init_memblock memblock_add_region(struct memblock_type *type, phys
376 * our allocation and return an error 395 * our allocation and return an error
377 */ 396 */
378 if (type->cnt == type->max && memblock_double_array(type)) { 397 if (type->cnt == type->max && memblock_double_array(type)) {
379 type->cnt--; 398 BUG_ON(slot < 0);
399 memblock_remove_region(type, slot);
380 return -1; 400 return -1;
381 } 401 }
382 402
@@ -389,52 +409,55 @@ long __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
389 409
390} 410}
391 411
392static long __init_memblock __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size) 412static long __init_memblock __memblock_remove(struct memblock_type *type,
413 phys_addr_t base, phys_addr_t size)
393{ 414{
394 phys_addr_t rgnbegin, rgnend;
395 phys_addr_t end = base + size; 415 phys_addr_t end = base + size;
396 int i; 416 int i;
397 417
398 rgnbegin = rgnend = 0; /* supress gcc warnings */ 418 /* Walk through the array for collisions */
399 419 for (i = 0; i < type->cnt; i++) {
400 /* Find the region where (base, size) belongs to */ 420 struct memblock_region *rgn = &type->regions[i];
401 for (i=0; i < type->cnt; i++) { 421 phys_addr_t rend = rgn->base + rgn->size;
402 rgnbegin = type->regions[i].base;
403 rgnend = rgnbegin + type->regions[i].size;
404 422
405 if ((rgnbegin <= base) && (end <= rgnend)) 423 /* Nothing more to do, exit */
424 if (rgn->base > end || rgn->size == 0)
406 break; 425 break;
407 }
408 426
409 /* Didn't find the region */ 427 /* If we fully enclose the block, drop it */
410 if (i == type->cnt) 428 if (base <= rgn->base && end >= rend) {
411 return -1; 429 memblock_remove_region(type, i--);
430 continue;
431 }
412 432
413 /* Check to see if we are removing entire region */ 433 /* If we are fully enclosed within a block
414 if ((rgnbegin == base) && (rgnend == end)) { 434 * then we need to split it and we are done
415 memblock_remove_region(type, i); 435 */
416 return 0; 436 if (base > rgn->base && end < rend) {
417 } 437 rgn->size = base - rgn->base;
438 if (!memblock_add_region(type, end, rend - end))
439 return 0;
440 /* Failure to split is bad, we at least
441 * restore the block before erroring
442 */
443 rgn->size = rend - rgn->base;
444 WARN_ON(1);
445 return -1;
446 }
418 447
419 /* Check to see if region is matching at the front */ 448 /* Check if we need to trim the bottom of a block */
420 if (rgnbegin == base) { 449 if (rgn->base < end && rend > end) {
421 type->regions[i].base = end; 450 rgn->size -= end - rgn->base;
422 type->regions[i].size -= size; 451 rgn->base = end;
423 return 0; 452 break;
424 } 453 }
425 454
426 /* Check to see if the region is matching at the end */ 455 /* And check if we need to trim the top of a block */
427 if (rgnend == end) { 456 if (base < rend)
428 type->regions[i].size -= size; 457 rgn->size -= rend - base;
429 return 0;
430 }
431 458
432 /* 459 }
433 * We need to split the entry - adjust the current one to the 460 return 0;
434 * beginging of the hole and add the region after hole.
435 */
436 type->regions[i].size = base - type->regions[i].base;
437 return memblock_add_region(type, end, rgnend - end);
438} 461}
439 462
440long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) 463long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
@@ -467,7 +490,7 @@ phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, ph
467 490
468 found = memblock_find_base(size, align, 0, max_addr); 491 found = memblock_find_base(size, align, 0, max_addr);
469 if (found != MEMBLOCK_ERROR && 492 if (found != MEMBLOCK_ERROR &&
470 memblock_add_region(&memblock.reserved, found, size) >= 0) 493 !memblock_add_region(&memblock.reserved, found, size))
471 return found; 494 return found;
472 495
473 return 0; 496 return 0;
@@ -548,7 +571,7 @@ static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp,
548 if (this_nid == nid) { 571 if (this_nid == nid) {
549 phys_addr_t ret = memblock_find_region(start, this_end, size, align); 572 phys_addr_t ret = memblock_find_region(start, this_end, size, align);
550 if (ret != MEMBLOCK_ERROR && 573 if (ret != MEMBLOCK_ERROR &&
551 memblock_add_region(&memblock.reserved, ret, size) >= 0) 574 !memblock_add_region(&memblock.reserved, ret, size))
552 return ret; 575 return ret;
553 } 576 }
554 start = this_end; 577 start = this_end;