diff options
author | Johannes Weiner <hannes@saeurebad.de> | 2008-07-24 00:28:05 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-24 13:47:20 -0400 |
commit | 5f2809e69c7128f86316048221cf45146f69a4a0 (patch) | |
tree | 3c958e4f2c563a0d59f577bf239fe103057d6730 /mm/bootmem.c | |
parent | 41546c17418fba08ece978bad72a33072715b8f3 (diff) |
bootmem: clean up alloc_bootmem_core
alloc_bootmem_core has become quite nasty to read over time. This is a
clean rewrite that keeps the semantics.
bdata->last_pos has been dropped.
bdata->last_success has been renamed to hint_idx and it is now an index
relative to the node's range. Since further block searching might start
at this index, it is now set to the end of a succeeded allocation rather
than its beginning.
bdata->last_offset has been renamed to last_end_off to be more clear that
it represents the ending address of the last allocation relative to the
node.
[y-goto@jp.fujitsu.com: fix new alloc_bootmem_core()]
Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/bootmem.c')
-rw-r--r-- | mm/bootmem.c | 212 |
1 files changed, 76 insertions, 136 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c index 300d126ec533..94ea612deccf 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -242,8 +242,9 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, | |||
242 | * considered reserved. | 242 | * considered reserved. |
243 | */ | 243 | */ |
244 | 244 | ||
245 | if (addr >= bdata->node_boot_start && addr < bdata->last_success) | 245 | if (addr >= bdata->node_boot_start && |
246 | bdata->last_success = addr; | 246 | PFN_DOWN(addr - bdata->node_boot_start) < bdata->hint_idx) |
247 | bdata->hint_idx = PFN_DOWN(addr - bdata->node_boot_start); | ||
247 | 248 | ||
248 | /* | 249 | /* |
249 | * Round up to index to the range. | 250 | * Round up to index to the range. |
@@ -431,36 +432,16 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size, | |||
431 | } | 432 | } |
432 | #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ | 433 | #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ |
433 | 434 | ||
434 | /* | 435 | static void * __init alloc_bootmem_core(struct bootmem_data *bdata, |
435 | * We 'merge' subsequent allocations to save space. We might 'lose' | 436 | unsigned long size, unsigned long align, |
436 | * some fraction of a page if allocations cannot be satisfied due to | 437 | unsigned long goal, unsigned long limit) |
437 | * size constraints on boxes where there is physical RAM space | ||
438 | * fragmentation - in these cases (mostly large memory boxes) this | ||
439 | * is not a problem. | ||
440 | * | ||
441 | * On low memory boxes we get it right in 100% of the cases. | ||
442 | * | ||
443 | * alignment has to be a power of 2 value. | ||
444 | * | ||
445 | * NOTE: This function is _not_ reentrant. | ||
446 | */ | ||
447 | static void * __init | ||
448 | alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, | ||
449 | unsigned long align, unsigned long goal, unsigned long limit) | ||
450 | { | 438 | { |
451 | unsigned long areasize, preferred; | 439 | unsigned long min, max, start, sidx, midx, step; |
452 | unsigned long i, start = 0, incr, eidx, end_pfn; | 440 | |
453 | void *ret; | 441 | BUG_ON(!size); |
454 | unsigned long node_boot_start; | 442 | BUG_ON(align & (align - 1)); |
455 | void *node_bootmem_map; | 443 | BUG_ON(limit && goal + size > limit); |
456 | |||
457 | if (!size) { | ||
458 | printk("alloc_bootmem_core(): zero-sized request\n"); | ||
459 | BUG(); | ||
460 | } | ||
461 | BUG_ON(align & (align-1)); | ||
462 | 444 | ||
463 | /* on nodes without memory - bootmem_map is NULL */ | ||
464 | if (!bdata->node_bootmem_map) | 445 | if (!bdata->node_bootmem_map) |
465 | return NULL; | 446 | return NULL; |
466 | 447 | ||
@@ -468,126 +449,85 @@ alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, | |||
468 | bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT, | 449 | bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT, |
469 | align, goal, limit); | 450 | align, goal, limit); |
470 | 451 | ||
471 | /* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */ | 452 | min = PFN_DOWN(bdata->node_boot_start); |
472 | node_boot_start = bdata->node_boot_start; | 453 | max = bdata->node_low_pfn; |
473 | node_bootmem_map = bdata->node_bootmem_map; | ||
474 | if (align) { | ||
475 | node_boot_start = ALIGN(bdata->node_boot_start, align); | ||
476 | if (node_boot_start > bdata->node_boot_start) | ||
477 | node_bootmem_map = (unsigned long *)bdata->node_bootmem_map + | ||
478 | PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG; | ||
479 | } | ||
480 | 454 | ||
481 | if (limit && node_boot_start >= limit) | 455 | goal >>= PAGE_SHIFT; |
456 | limit >>= PAGE_SHIFT; | ||
457 | |||
458 | if (limit && max > limit) | ||
459 | max = limit; | ||
460 | if (max <= min) | ||
482 | return NULL; | 461 | return NULL; |
483 | 462 | ||
484 | end_pfn = bdata->node_low_pfn; | 463 | step = max(align >> PAGE_SHIFT, 1UL); |
485 | limit = PFN_DOWN(limit); | ||
486 | if (limit && end_pfn > limit) | ||
487 | end_pfn = limit; | ||
488 | 464 | ||
489 | eidx = end_pfn - PFN_DOWN(node_boot_start); | 465 | if (goal && min < goal && goal < max) |
466 | start = ALIGN(goal, step); | ||
467 | else | ||
468 | start = ALIGN(min, step); | ||
490 | 469 | ||
491 | /* | 470 | sidx = start - PFN_DOWN(bdata->node_boot_start); |
492 | * We try to allocate bootmem pages above 'goal' | 471 | midx = max - PFN_DOWN(bdata->node_boot_start); |
493 | * first, then we try to allocate lower pages. | ||
494 | */ | ||
495 | preferred = 0; | ||
496 | if (goal && PFN_DOWN(goal) < end_pfn) { | ||
497 | if (goal > node_boot_start) | ||
498 | preferred = goal - node_boot_start; | ||
499 | |||
500 | if (bdata->last_success > node_boot_start && | ||
501 | bdata->last_success - node_boot_start >= preferred) | ||
502 | if (!limit || (limit && limit > bdata->last_success)) | ||
503 | preferred = bdata->last_success - node_boot_start; | ||
504 | } | ||
505 | 472 | ||
506 | preferred = PFN_DOWN(ALIGN(preferred, align)); | 473 | if (bdata->hint_idx > sidx) { |
507 | areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; | 474 | /* Make sure we retry on failure */ |
508 | incr = align >> PAGE_SHIFT ? : 1; | 475 | goal = 1; |
476 | sidx = ALIGN(bdata->hint_idx, step); | ||
477 | } | ||
509 | 478 | ||
510 | restart_scan: | 479 | while (1) { |
511 | for (i = preferred; i < eidx;) { | 480 | int merge; |
512 | unsigned long j; | 481 | void *region; |
482 | unsigned long eidx, i, start_off, end_off; | ||
483 | find_block: | ||
484 | sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx); | ||
485 | sidx = ALIGN(sidx, step); | ||
486 | eidx = sidx + PFN_UP(size); | ||
513 | 487 | ||
514 | i = find_next_zero_bit(node_bootmem_map, eidx, i); | 488 | if (sidx >= midx || eidx > midx) |
515 | i = ALIGN(i, incr); | ||
516 | if (i >= eidx) | ||
517 | break; | 489 | break; |
518 | if (test_bit(i, node_bootmem_map)) { | ||
519 | i += incr; | ||
520 | continue; | ||
521 | } | ||
522 | for (j = i + 1; j < i + areasize; ++j) { | ||
523 | if (j >= eidx) | ||
524 | goto fail_block; | ||
525 | if (test_bit(j, node_bootmem_map)) | ||
526 | goto fail_block; | ||
527 | } | ||
528 | start = i; | ||
529 | goto found; | ||
530 | fail_block: | ||
531 | i = ALIGN(j, incr); | ||
532 | if (i == j) | ||
533 | i += incr; | ||
534 | } | ||
535 | |||
536 | if (preferred > 0) { | ||
537 | preferred = 0; | ||
538 | goto restart_scan; | ||
539 | } | ||
540 | return NULL; | ||
541 | 490 | ||
542 | found: | 491 | for (i = sidx; i < eidx; i++) |
543 | bdata->last_success = PFN_PHYS(start) + node_boot_start; | 492 | if (test_bit(i, bdata->node_bootmem_map)) { |
544 | BUG_ON(start >= eidx); | 493 | sidx = ALIGN(i, step); |
494 | if (sidx == i) | ||
495 | sidx += step; | ||
496 | goto find_block; | ||
497 | } | ||
545 | 498 | ||
546 | /* | 499 | if (bdata->last_end_off && |
547 | * Is the next page of the previous allocation-end the start | 500 | PFN_DOWN(bdata->last_end_off) + 1 == sidx) |
548 | * of this allocation's buffer? If yes then we can 'merge' | 501 | start_off = ALIGN(bdata->last_end_off, align); |
549 | * the previous partial page with this allocation. | 502 | else |
550 | */ | 503 | start_off = PFN_PHYS(sidx); |
551 | if (align < PAGE_SIZE && | 504 | |
552 | bdata->last_offset && bdata->last_pos+1 == start) { | 505 | merge = PFN_DOWN(start_off) < sidx; |
553 | unsigned long offset, remaining_size; | 506 | end_off = start_off + size; |
554 | offset = ALIGN(bdata->last_offset, align); | 507 | |
555 | BUG_ON(offset > PAGE_SIZE); | 508 | bdata->last_end_off = end_off; |
556 | remaining_size = PAGE_SIZE - offset; | 509 | bdata->hint_idx = PFN_UP(end_off); |
557 | if (size < remaining_size) { | 510 | |
558 | areasize = 0; | 511 | /* |
559 | /* last_pos unchanged */ | 512 | * Reserve the area now: |
560 | bdata->last_offset = offset + size; | 513 | */ |
561 | ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + | 514 | for (i = PFN_DOWN(start_off) + merge; |
562 | offset + node_boot_start); | 515 | i < PFN_UP(end_off); i++) |
563 | } else { | 516 | if (test_and_set_bit(i, bdata->node_bootmem_map)) |
564 | remaining_size = size - remaining_size; | 517 | BUG(); |
565 | areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE; | 518 | |
566 | ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + | 519 | region = phys_to_virt(bdata->node_boot_start + start_off); |
567 | offset + node_boot_start); | 520 | memset(region, 0, size); |
568 | bdata->last_pos = start + areasize - 1; | 521 | return region; |
569 | bdata->last_offset = remaining_size; | ||
570 | } | ||
571 | bdata->last_offset &= ~PAGE_MASK; | ||
572 | } else { | ||
573 | bdata->last_pos = start + areasize - 1; | ||
574 | bdata->last_offset = size & ~PAGE_MASK; | ||
575 | ret = phys_to_virt(start * PAGE_SIZE + node_boot_start); | ||
576 | } | 522 | } |
577 | 523 | ||
578 | bdebug("nid=%td start=%lx end=%lx\n", | 524 | if (goal) { |
579 | bdata - bootmem_node_data, | 525 | goal = 0; |
580 | start + PFN_DOWN(bdata->node_boot_start), | 526 | sidx = 0; |
581 | start + areasize + PFN_DOWN(bdata->node_boot_start)); | 527 | goto find_block; |
528 | } | ||
582 | 529 | ||
583 | /* | 530 | return NULL; |
584 | * Reserve the area now: | ||
585 | */ | ||
586 | for (i = start; i < start + areasize; i++) | ||
587 | if (unlikely(test_and_set_bit(i, node_bootmem_map))) | ||
588 | BUG(); | ||
589 | memset(ret, 0, size); | ||
590 | return ret; | ||
591 | } | 531 | } |
592 | 532 | ||
593 | /** | 533 | /** |