diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2011-03-22 19:33:43 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-22 20:44:09 -0400 |
commit | 8f7a66051b7523108c5aefb08c6a637e54aedc47 (patch) | |
tree | 212611c9ac7c4d9f0918e431043b3fc3f9fd43b6 /mm | |
parent | 84be48d84a53044e13aa8816aab201ab5480815d (diff) |
mm/memblock: properly handle overlaps and fix error path
Currently memblock_reserve() or memblock_free() don't handle overlaps of
any kind. There is some special casing for coalescing exactly adjacent
regions but that's about it.
This is annoying because typically memblock_reserve() is used to mark
regions passed by the firmware as reserved and we all know how much we can
trust our firmwares...
Also, with the current code, if we do something it doesn't handle right
such as trying to memblock_reserve() a large range spanning multiple
existing smaller reserved regions for example, or doing overlapping
reservations, it can silently corrupt the internal region array, causing
odd errors much later on, such as allocations returning reserved regions
etc...
This patch rewrites the underlying functions that add or remove a region
to the arrays. The new code is a lot more robust as it fully handles
overlapping regions. It's also, imho, simpler than the previous
implementation.
In addition, while doing so, I found a bug where if we fail to double the
array while adding a region, we would remove the last region of the array
rather than the region we just allocated. This fixes it too.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memblock.c | 241 |
1 files changed, 132 insertions, 109 deletions
diff --git a/mm/memblock.c b/mm/memblock.c index 4618fda975a0..a0562d1a6ad4 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -58,28 +58,6 @@ static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, p | |||
58 | return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); | 58 | return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); |
59 | } | 59 | } |
60 | 60 | ||
61 | static long __init_memblock memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1, | ||
62 | phys_addr_t base2, phys_addr_t size2) | ||
63 | { | ||
64 | if (base2 == base1 + size1) | ||
65 | return 1; | ||
66 | else if (base1 == base2 + size2) | ||
67 | return -1; | ||
68 | |||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | static long __init_memblock memblock_regions_adjacent(struct memblock_type *type, | ||
73 | unsigned long r1, unsigned long r2) | ||
74 | { | ||
75 | phys_addr_t base1 = type->regions[r1].base; | ||
76 | phys_addr_t size1 = type->regions[r1].size; | ||
77 | phys_addr_t base2 = type->regions[r2].base; | ||
78 | phys_addr_t size2 = type->regions[r2].size; | ||
79 | |||
80 | return memblock_addrs_adjacent(base1, size1, base2, size2); | ||
81 | } | ||
82 | |||
83 | long __init_memblock memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) | 61 | long __init_memblock memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) |
84 | { | 62 | { |
85 | unsigned long i; | 63 | unsigned long i; |
@@ -206,14 +184,13 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u | |||
206 | type->regions[i].size = type->regions[i + 1].size; | 184 | type->regions[i].size = type->regions[i + 1].size; |
207 | } | 185 | } |
208 | type->cnt--; | 186 | type->cnt--; |
209 | } | ||
210 | 187 | ||
211 | /* Assumption: base addr of region 1 < base addr of region 2 */ | 188 | /* Special case for empty arrays */ |
212 | static void __init_memblock memblock_coalesce_regions(struct memblock_type *type, | 189 | if (type->cnt == 0) { |
213 | unsigned long r1, unsigned long r2) | 190 | type->cnt = 1; |
214 | { | 191 | type->regions[0].base = 0; |
215 | type->regions[r1].size += type->regions[r2].size; | 192 | type->regions[0].size = 0; |
216 | memblock_remove_region(type, r2); | 193 | } |
217 | } | 194 | } |
218 | 195 | ||
219 | /* Defined below but needed now */ | 196 | /* Defined below but needed now */ |
@@ -276,7 +253,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) | |||
276 | return 0; | 253 | return 0; |
277 | 254 | ||
278 | /* Add the new reserved region now. Should not fail ! */ | 255 | /* Add the new reserved region now. Should not fail ! */ |
279 | BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size) < 0); | 256 | BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size)); |
280 | 257 | ||
281 | /* If the array wasn't our static init one, then free it. We only do | 258 | /* If the array wasn't our static init one, then free it. We only do |
282 | * that before SLAB is available as later on, we don't know whether | 259 | * that before SLAB is available as later on, we don't know whether |
@@ -296,58 +273,99 @@ extern int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1 | |||
296 | return 1; | 273 | return 1; |
297 | } | 274 | } |
298 | 275 | ||
299 | static long __init_memblock memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) | 276 | static long __init_memblock memblock_add_region(struct memblock_type *type, |
277 | phys_addr_t base, phys_addr_t size) | ||
300 | { | 278 | { |
301 | unsigned long coalesced = 0; | 279 | phys_addr_t end = base + size; |
302 | long adjacent, i; | 280 | int i, slot = -1; |
303 | |||
304 | if ((type->cnt == 1) && (type->regions[0].size == 0)) { | ||
305 | type->regions[0].base = base; | ||
306 | type->regions[0].size = size; | ||
307 | return 0; | ||
308 | } | ||
309 | 281 | ||
310 | /* First try and coalesce this MEMBLOCK with another. */ | 282 | /* First try and coalesce this MEMBLOCK with others */ |
311 | for (i = 0; i < type->cnt; i++) { | 283 | for (i = 0; i < type->cnt; i++) { |
312 | phys_addr_t rgnbase = type->regions[i].base; | 284 | struct memblock_region *rgn = &type->regions[i]; |
313 | phys_addr_t rgnsize = type->regions[i].size; | 285 | phys_addr_t rend = rgn->base + rgn->size; |
286 | |||
287 | /* Exit if there's no possible hits */ | ||
288 | if (rgn->base > end || rgn->size == 0) | ||
289 | break; | ||
314 | 290 | ||
315 | if ((rgnbase == base) && (rgnsize == size)) | 291 | /* Check if we are fully enclosed within an existing |
316 | /* Already have this region, so we're done */ | 292 | * block |
293 | */ | ||
294 | if (rgn->base <= base && rend >= end) | ||
317 | return 0; | 295 | return 0; |
318 | 296 | ||
319 | adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize); | 297 | /* Check if we overlap or are adjacent with the bottom |
320 | /* Check if arch allows coalescing */ | 298 | * of a block. |
321 | if (adjacent != 0 && type == &memblock.memory && | 299 | */ |
322 | !memblock_memory_can_coalesce(base, size, rgnbase, rgnsize)) | 300 | if (base < rgn->base && end >= rgn->base) { |
323 | break; | 301 | /* If we can't coalesce, create a new block */ |
324 | if (adjacent > 0) { | 302 | if (!memblock_memory_can_coalesce(base, size, |
325 | type->regions[i].base -= size; | 303 | rgn->base, |
326 | type->regions[i].size += size; | 304 | rgn->size)) { |
327 | coalesced++; | 305 | /* Overlap & can't coalesce are mutually |
328 | break; | 306 | * exclusive, if you do that, be prepared |
329 | } else if (adjacent < 0) { | 307 | * for trouble |
330 | type->regions[i].size += size; | 308 | */ |
331 | coalesced++; | 309 | WARN_ON(end != rgn->base); |
332 | break; | 310 | goto new_block; |
311 | } | ||
312 | /* We extend the bottom of the block down to our | ||
313 | * base | ||
314 | */ | ||
315 | rgn->base = base; | ||
316 | rgn->size = rend - base; | ||
317 | |||
318 | /* Return if we have nothing else to allocate | ||
319 | * (fully coalesced) | ||
320 | */ | ||
321 | if (rend >= end) | ||
322 | return 0; | ||
323 | |||
324 | /* We continue processing from the end of the | ||
325 | * coalesced block. | ||
326 | */ | ||
327 | base = rend; | ||
328 | size = end - base; | ||
329 | } | ||
330 | |||
331 | /* Now check if we overlap or are adjacent with the | ||
332 | * top of a block | ||
333 | */ | ||
334 | if (base <= rend && end >= rend) { | ||
335 | /* If we can't coalesce, create a new block */ | ||
336 | if (!memblock_memory_can_coalesce(rgn->base, | ||
337 | rgn->size, | ||
338 | base, size)) { | ||
339 | /* Overlap & can't coalesce are mutually | ||
340 | * exclusive, if you do that, be prepared | ||
341 | * for trouble | ||
342 | */ | ||
343 | WARN_ON(rend != base); | ||
344 | goto new_block; | ||
345 | } | ||
346 | /* We adjust our base down to enclose the | ||
347 | * original block and destroy it. It will be | ||
348 | * part of our new allocation. Since we've | ||
349 | * freed an entry, we know we won't fail | ||
350 | * to allocate one later, so we won't risk | ||
351 | * losing the original block allocation. | ||
352 | */ | ||
353 | size += (base - rgn->base); | ||
354 | base = rgn->base; | ||
355 | memblock_remove_region(type, i--); | ||
333 | } | 356 | } |
334 | } | 357 | } |
335 | 358 | ||
336 | /* If we plugged a hole, we may want to also coalesce with the | 359 | /* If the array is empty, special case, replace the fake |
337 | * next region | 360 | * filler region and return |
338 | */ | 361 | */ |
339 | if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1) && | 362 | if ((type->cnt == 1) && (type->regions[0].size == 0)) { |
340 | ((type != &memblock.memory || memblock_memory_can_coalesce(type->regions[i].base, | 363 | type->regions[0].base = base; |
341 | type->regions[i].size, | 364 | type->regions[0].size = size; |
342 | type->regions[i+1].base, | 365 | return 0; |
343 | type->regions[i+1].size)))) { | ||
344 | memblock_coalesce_regions(type, i, i+1); | ||
345 | coalesced++; | ||
346 | } | 366 | } |
347 | 367 | ||
348 | if (coalesced) | 368 | new_block: |
349 | return coalesced; | ||
350 | |||
351 | /* If we are out of space, we fail. It's too late to resize the array | 369 | /* If we are out of space, we fail. It's too late to resize the array |
352 | * but then this shouldn't have happened in the first place. | 370 | * but then this shouldn't have happened in the first place. |
353 | */ | 371 | */ |
@@ -362,13 +380,14 @@ static long __init_memblock memblock_add_region(struct memblock_type *type, phys | |||
362 | } else { | 380 | } else { |
363 | type->regions[i+1].base = base; | 381 | type->regions[i+1].base = base; |
364 | type->regions[i+1].size = size; | 382 | type->regions[i+1].size = size; |
383 | slot = i + 1; | ||
365 | break; | 384 | break; |
366 | } | 385 | } |
367 | } | 386 | } |
368 | |||
369 | if (base < type->regions[0].base) { | 387 | if (base < type->regions[0].base) { |
370 | type->regions[0].base = base; | 388 | type->regions[0].base = base; |
371 | type->regions[0].size = size; | 389 | type->regions[0].size = size; |
390 | slot = 0; | ||
372 | } | 391 | } |
373 | type->cnt++; | 392 | type->cnt++; |
374 | 393 | ||
@@ -376,7 +395,8 @@ static long __init_memblock memblock_add_region(struct memblock_type *type, phys | |||
376 | * our allocation and return an error | 395 | * our allocation and return an error |
377 | */ | 396 | */ |
378 | if (type->cnt == type->max && memblock_double_array(type)) { | 397 | if (type->cnt == type->max && memblock_double_array(type)) { |
379 | type->cnt--; | 398 | BUG_ON(slot < 0); |
399 | memblock_remove_region(type, slot); | ||
380 | return -1; | 400 | return -1; |
381 | } | 401 | } |
382 | 402 | ||
@@ -389,52 +409,55 @@ long __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) | |||
389 | 409 | ||
390 | } | 410 | } |
391 | 411 | ||
392 | static long __init_memblock __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size) | 412 | static long __init_memblock __memblock_remove(struct memblock_type *type, |
413 | phys_addr_t base, phys_addr_t size) | ||
393 | { | 414 | { |
394 | phys_addr_t rgnbegin, rgnend; | ||
395 | phys_addr_t end = base + size; | 415 | phys_addr_t end = base + size; |
396 | int i; | 416 | int i; |
397 | 417 | ||
398 | rgnbegin = rgnend = 0; /* supress gcc warnings */ | 418 | /* Walk through the array for collisions */ |
399 | 419 | for (i = 0; i < type->cnt; i++) { | |
400 | /* Find the region where (base, size) belongs to */ | 420 | struct memblock_region *rgn = &type->regions[i]; |
401 | for (i=0; i < type->cnt; i++) { | 421 | phys_addr_t rend = rgn->base + rgn->size; |
402 | rgnbegin = type->regions[i].base; | ||
403 | rgnend = rgnbegin + type->regions[i].size; | ||
404 | 422 | ||
405 | if ((rgnbegin <= base) && (end <= rgnend)) | 423 | /* Nothing more to do, exit */ |
424 | if (rgn->base > end || rgn->size == 0) | ||
406 | break; | 425 | break; |
407 | } | ||
408 | 426 | ||
409 | /* Didn't find the region */ | 427 | /* If we fully enclose the block, drop it */ |
410 | if (i == type->cnt) | 428 | if (base <= rgn->base && end >= rend) { |
411 | return -1; | 429 | memblock_remove_region(type, i--); |
430 | continue; | ||
431 | } | ||
412 | 432 | ||
413 | /* Check to see if we are removing entire region */ | 433 | /* If we are fully enclosed within a block |
414 | if ((rgnbegin == base) && (rgnend == end)) { | 434 | * then we need to split it and we are done |
415 | memblock_remove_region(type, i); | 435 | */ |
416 | return 0; | 436 | if (base > rgn->base && end < rend) { |
417 | } | 437 | rgn->size = base - rgn->base; |
438 | if (!memblock_add_region(type, end, rend - end)) | ||
439 | return 0; | ||
440 | /* Failure to split is bad, we at least | ||
441 | * restore the block before erroring | ||
442 | */ | ||
443 | rgn->size = rend - rgn->base; | ||
444 | WARN_ON(1); | ||
445 | return -1; | ||
446 | } | ||
418 | 447 | ||
419 | /* Check to see if region is matching at the front */ | 448 | /* Check if we need to trim the bottom of a block */ |
420 | if (rgnbegin == base) { | 449 | if (rgn->base < end && rend > end) { |
421 | type->regions[i].base = end; | 450 | rgn->size -= end - rgn->base; |
422 | type->regions[i].size -= size; | 451 | rgn->base = end; |
423 | return 0; | 452 | break; |
424 | } | 453 | } |
425 | 454 | ||
426 | /* Check to see if the region is matching at the end */ | 455 | /* And check if we need to trim the top of a block */ |
427 | if (rgnend == end) { | 456 | if (base < rend) |
428 | type->regions[i].size -= size; | 457 | rgn->size -= rend - base; |
429 | return 0; | ||
430 | } | ||
431 | 458 | ||
432 | /* | 459 | } |
433 | * We need to split the entry - adjust the current one to the | 460 | return 0; |
434 | * beginging of the hole and add the region after hole. | ||
435 | */ | ||
436 | type->regions[i].size = base - type->regions[i].base; | ||
437 | return memblock_add_region(type, end, rgnend - end); | ||
438 | } | 461 | } |
439 | 462 | ||
440 | long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) | 463 | long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) |
@@ -467,7 +490,7 @@ phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, ph | |||
467 | 490 | ||
468 | found = memblock_find_base(size, align, 0, max_addr); | 491 | found = memblock_find_base(size, align, 0, max_addr); |
469 | if (found != MEMBLOCK_ERROR && | 492 | if (found != MEMBLOCK_ERROR && |
470 | memblock_add_region(&memblock.reserved, found, size) >= 0) | 493 | !memblock_add_region(&memblock.reserved, found, size)) |
471 | return found; | 494 | return found; |
472 | 495 | ||
473 | return 0; | 496 | return 0; |
@@ -548,7 +571,7 @@ static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp, | |||
548 | if (this_nid == nid) { | 571 | if (this_nid == nid) { |
549 | phys_addr_t ret = memblock_find_region(start, this_end, size, align); | 572 | phys_addr_t ret = memblock_find_region(start, this_end, size, align); |
550 | if (ret != MEMBLOCK_ERROR && | 573 | if (ret != MEMBLOCK_ERROR && |
551 | memblock_add_region(&memblock.reserved, ret, size) >= 0) | 574 | !memblock_add_region(&memblock.reserved, ret, size)) |
552 | return ret; | 575 | return ret; |
553 | } | 576 | } |
554 | start = this_end; | 577 | start = this_end; |