aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@saeurebad.de>2008-07-24 00:28:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-24 13:47:20 -0400
commit5f2809e69c7128f86316048221cf45146f69a4a0 (patch)
tree3c958e4f2c563a0d59f577bf239fe103057d6730 /mm
parent41546c17418fba08ece978bad72a33072715b8f3 (diff)
bootmem: clean up alloc_bootmem_core
alloc_bootmem_core has become quite nasty to read over time. This is a clean rewrite that keeps the semantics. bdata->last_pos has been dropped. bdata->last_success has been renamed to hint_idx and it is now an index relative to the node's range. Since further block searching might start at this index, it is now set to the end of a succeeded allocation rather than its beginning. bdata->last_offset has been renamed to last_end_off to be more clear that it represents the ending address of the last allocation relative to the node. [y-goto@jp.fujitsu.com: fix new alloc_bootmem_core()] Signed-off-by: Johannes Weiner <hannes@saeurebad.de> Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/bootmem.c212
1 files changed, 76 insertions, 136 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 300d126ec533..94ea612deccf 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -242,8 +242,9 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
242 * considered reserved. 242 * considered reserved.
243 */ 243 */
244 244
245 if (addr >= bdata->node_boot_start && addr < bdata->last_success) 245 if (addr >= bdata->node_boot_start &&
246 bdata->last_success = addr; 246 PFN_DOWN(addr - bdata->node_boot_start) < bdata->hint_idx)
247 bdata->hint_idx = PFN_DOWN(addr - bdata->node_boot_start);
247 248
248 /* 249 /*
249 * Round up to index to the range. 250 * Round up to index to the range.
@@ -431,36 +432,16 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size,
431} 432}
432#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ 433#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
433 434
434/* 435static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
435 * We 'merge' subsequent allocations to save space. We might 'lose' 436 unsigned long size, unsigned long align,
436 * some fraction of a page if allocations cannot be satisfied due to 437 unsigned long goal, unsigned long limit)
437 * size constraints on boxes where there is physical RAM space
438 * fragmentation - in these cases (mostly large memory boxes) this
439 * is not a problem.
440 *
441 * On low memory boxes we get it right in 100% of the cases.
442 *
443 * alignment has to be a power of 2 value.
444 *
445 * NOTE: This function is _not_ reentrant.
446 */
447static void * __init
448alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
449 unsigned long align, unsigned long goal, unsigned long limit)
450{ 438{
451 unsigned long areasize, preferred; 439 unsigned long min, max, start, sidx, midx, step;
452 unsigned long i, start = 0, incr, eidx, end_pfn; 440
453 void *ret; 441 BUG_ON(!size);
454 unsigned long node_boot_start; 442 BUG_ON(align & (align - 1));
455 void *node_bootmem_map; 443 BUG_ON(limit && goal + size > limit);
456
457 if (!size) {
458 printk("alloc_bootmem_core(): zero-sized request\n");
459 BUG();
460 }
461 BUG_ON(align & (align-1));
462 444
463 /* on nodes without memory - bootmem_map is NULL */
464 if (!bdata->node_bootmem_map) 445 if (!bdata->node_bootmem_map)
465 return NULL; 446 return NULL;
466 447
@@ -468,126 +449,85 @@ alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
468 bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT, 449 bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
469 align, goal, limit); 450 align, goal, limit);
470 451
471 /* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */ 452 min = PFN_DOWN(bdata->node_boot_start);
472 node_boot_start = bdata->node_boot_start; 453 max = bdata->node_low_pfn;
473 node_bootmem_map = bdata->node_bootmem_map;
474 if (align) {
475 node_boot_start = ALIGN(bdata->node_boot_start, align);
476 if (node_boot_start > bdata->node_boot_start)
477 node_bootmem_map = (unsigned long *)bdata->node_bootmem_map +
478 PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG;
479 }
480 454
481 if (limit && node_boot_start >= limit) 455 goal >>= PAGE_SHIFT;
456 limit >>= PAGE_SHIFT;
457
458 if (limit && max > limit)
459 max = limit;
460 if (max <= min)
482 return NULL; 461 return NULL;
483 462
484 end_pfn = bdata->node_low_pfn; 463 step = max(align >> PAGE_SHIFT, 1UL);
485 limit = PFN_DOWN(limit);
486 if (limit && end_pfn > limit)
487 end_pfn = limit;
488 464
489 eidx = end_pfn - PFN_DOWN(node_boot_start); 465 if (goal && min < goal && goal < max)
466 start = ALIGN(goal, step);
467 else
468 start = ALIGN(min, step);
490 469
491 /* 470 sidx = start - PFN_DOWN(bdata->node_boot_start);
492 * We try to allocate bootmem pages above 'goal' 471 midx = max - PFN_DOWN(bdata->node_boot_start);
493 * first, then we try to allocate lower pages.
494 */
495 preferred = 0;
496 if (goal && PFN_DOWN(goal) < end_pfn) {
497 if (goal > node_boot_start)
498 preferred = goal - node_boot_start;
499
500 if (bdata->last_success > node_boot_start &&
501 bdata->last_success - node_boot_start >= preferred)
502 if (!limit || (limit && limit > bdata->last_success))
503 preferred = bdata->last_success - node_boot_start;
504 }
505 472
506 preferred = PFN_DOWN(ALIGN(preferred, align)); 473 if (bdata->hint_idx > sidx) {
507 areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; 474 /* Make sure we retry on failure */
508 incr = align >> PAGE_SHIFT ? : 1; 475 goal = 1;
476 sidx = ALIGN(bdata->hint_idx, step);
477 }
509 478
510restart_scan: 479 while (1) {
511 for (i = preferred; i < eidx;) { 480 int merge;
512 unsigned long j; 481 void *region;
482 unsigned long eidx, i, start_off, end_off;
483find_block:
484 sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);
485 sidx = ALIGN(sidx, step);
486 eidx = sidx + PFN_UP(size);
513 487
514 i = find_next_zero_bit(node_bootmem_map, eidx, i); 488 if (sidx >= midx || eidx > midx)
515 i = ALIGN(i, incr);
516 if (i >= eidx)
517 break; 489 break;
518 if (test_bit(i, node_bootmem_map)) {
519 i += incr;
520 continue;
521 }
522 for (j = i + 1; j < i + areasize; ++j) {
523 if (j >= eidx)
524 goto fail_block;
525 if (test_bit(j, node_bootmem_map))
526 goto fail_block;
527 }
528 start = i;
529 goto found;
530 fail_block:
531 i = ALIGN(j, incr);
532 if (i == j)
533 i += incr;
534 }
535
536 if (preferred > 0) {
537 preferred = 0;
538 goto restart_scan;
539 }
540 return NULL;
541 490
542found: 491 for (i = sidx; i < eidx; i++)
543 bdata->last_success = PFN_PHYS(start) + node_boot_start; 492 if (test_bit(i, bdata->node_bootmem_map)) {
544 BUG_ON(start >= eidx); 493 sidx = ALIGN(i, step);
494 if (sidx == i)
495 sidx += step;
496 goto find_block;
497 }
545 498
546 /* 499 if (bdata->last_end_off &&
547 * Is the next page of the previous allocation-end the start 500 PFN_DOWN(bdata->last_end_off) + 1 == sidx)
548 * of this allocation's buffer? If yes then we can 'merge' 501 start_off = ALIGN(bdata->last_end_off, align);
549 * the previous partial page with this allocation. 502 else
550 */ 503 start_off = PFN_PHYS(sidx);
551 if (align < PAGE_SIZE && 504
552 bdata->last_offset && bdata->last_pos+1 == start) { 505 merge = PFN_DOWN(start_off) < sidx;
553 unsigned long offset, remaining_size; 506 end_off = start_off + size;
554 offset = ALIGN(bdata->last_offset, align); 507
555 BUG_ON(offset > PAGE_SIZE); 508 bdata->last_end_off = end_off;
556 remaining_size = PAGE_SIZE - offset; 509 bdata->hint_idx = PFN_UP(end_off);
557 if (size < remaining_size) { 510
558 areasize = 0; 511 /*
559 /* last_pos unchanged */ 512 * Reserve the area now:
560 bdata->last_offset = offset + size; 513 */
561 ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + 514 for (i = PFN_DOWN(start_off) + merge;
562 offset + node_boot_start); 515 i < PFN_UP(end_off); i++)
563 } else { 516 if (test_and_set_bit(i, bdata->node_bootmem_map))
564 remaining_size = size - remaining_size; 517 BUG();
565 areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE; 518
566 ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + 519 region = phys_to_virt(bdata->node_boot_start + start_off);
567 offset + node_boot_start); 520 memset(region, 0, size);
568 bdata->last_pos = start + areasize - 1; 521 return region;
569 bdata->last_offset = remaining_size;
570 }
571 bdata->last_offset &= ~PAGE_MASK;
572 } else {
573 bdata->last_pos = start + areasize - 1;
574 bdata->last_offset = size & ~PAGE_MASK;
575 ret = phys_to_virt(start * PAGE_SIZE + node_boot_start);
576 } 522 }
577 523
578 bdebug("nid=%td start=%lx end=%lx\n", 524 if (goal) {
579 bdata - bootmem_node_data, 525 goal = 0;
580 start + PFN_DOWN(bdata->node_boot_start), 526 sidx = 0;
581 start + areasize + PFN_DOWN(bdata->node_boot_start)); 527 goto find_block;
528 }
582 529
583 /* 530 return NULL;
584 * Reserve the area now:
585 */
586 for (i = start; i < start + areasize; i++)
587 if (unlikely(test_and_set_bit(i, node_bootmem_map)))
588 BUG();
589 memset(ret, 0, size);
590 return ret;
591} 531}
592 532
593/** 533/**