diff options
author | Tejun Heo <tj@kernel.org> | 2011-12-08 13:22:09 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-12-08 13:22:09 -0500 |
commit | 7bd0b0f0da3b1ec11cbcc798eb0ef747a1184077 (patch) | |
tree | ef285a020ffc04250b7327f0e9876a5988aa600e /mm | |
parent | 0ee332c1451869963626bf9cac88f165a90990e1 (diff) |
memblock: Reimplement memblock allocation using reverse free area iterator
Now that all early memory information is in memblock when enabled, we
can implement reverse free area iterator and use it to implement NUMA
aware allocator which is then wrapped for simpler variants instead of
the confusing and inefficient mending of information in separate NUMA
aware allocator.
Implement for_each_free_mem_range_reverse(), use it to reimplement
memblock_find_in_range_node() which in turn is used by all allocators.
The visible allocator interface is inconsistent and can probably use
some cleanup too.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memblock.c | 273 |
1 files changed, 127 insertions, 146 deletions
diff --git a/mm/memblock.c b/mm/memblock.c index 1adbef09b43a..2f55f19b7c86 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -79,78 +79,66 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type, | |||
79 | return (i < type->cnt) ? i : -1; | 79 | return (i < type->cnt) ? i : -1; |
80 | } | 80 | } |
81 | 81 | ||
82 | /* | 82 | /** |
83 | * Find, allocate, deallocate or reserve unreserved regions. All allocations | 83 | * memblock_find_in_range_node - find free area in given range and node |
84 | * are top-down. | 84 | * @start: start of candidate range |
85 | * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} | ||
86 | * @size: size of free area to find | ||
87 | * @align: alignment of free area to find | ||
88 | * @nid: nid of the free area to find, %MAX_NUMNODES for any node | ||
89 | * | ||
90 | * Find @size free area aligned to @align in the specified range and node. | ||
91 | * | ||
92 | * RETURNS: | ||
93 | * Found address on success, %0 on failure. | ||
85 | */ | 94 | */ |
86 | 95 | phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start, | |
87 | static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_addr_t end, | 96 | phys_addr_t end, phys_addr_t size, |
88 | phys_addr_t size, phys_addr_t align) | 97 | phys_addr_t align, int nid) |
89 | { | 98 | { |
90 | phys_addr_t base, res_base; | 99 | phys_addr_t this_start, this_end, cand; |
91 | long j; | 100 | u64 i; |
92 | 101 | ||
93 | /* In case, huge size is requested */ | 102 | /* align @size to avoid excessive fragmentation on reserved array */ |
94 | if (end < size) | 103 | size = round_up(size, align); |
95 | return 0; | 104 | |
105 | /* pump up @end */ | ||
106 | if (end == MEMBLOCK_ALLOC_ACCESSIBLE) | ||
107 | end = memblock.current_limit; | ||
96 | 108 | ||
97 | base = round_down(end - size, align); | 109 | /* adjust @start to avoid underflow and allocating the first page */ |
110 | start = max3(start, size, (phys_addr_t)PAGE_SIZE); | ||
111 | end = max(start, end); | ||
98 | 112 | ||
99 | /* Prevent allocations returning 0 as it's also used to | 113 | for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) { |
100 | * indicate an allocation failure | 114 | this_start = clamp(this_start, start, end); |
101 | */ | 115 | this_end = clamp(this_end, start, end); |
102 | if (start == 0) | ||
103 | start = PAGE_SIZE; | ||
104 | |||
105 | while (start <= base) { | ||
106 | j = memblock_overlaps_region(&memblock.reserved, base, size); | ||
107 | if (j < 0) | ||
108 | return base; | ||
109 | res_base = memblock.reserved.regions[j].base; | ||
110 | if (res_base < size) | ||
111 | break; | ||
112 | base = round_down(res_base - size, align); | ||
113 | } | ||
114 | 116 | ||
117 | cand = round_down(this_end - size, align); | ||
118 | if (cand >= this_start) | ||
119 | return cand; | ||
120 | } | ||
115 | return 0; | 121 | return 0; |
116 | } | 122 | } |
117 | 123 | ||
118 | /* | 124 | /** |
119 | * Find a free area with specified alignment in a specific range. | 125 | * memblock_find_in_range - find free area in given range |
126 | * @start: start of candidate range | ||
127 | * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} | ||
128 | * @size: size of free area to find | ||
129 | * @align: alignment of free area to find | ||
130 | * | ||
131 | * Find @size free area aligned to @align in the specified range. | ||
132 | * | ||
133 | * RETURNS: | ||
134 | * Found address on success, %0 on failure. | ||
120 | */ | 135 | */ |
121 | phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, phys_addr_t end, | 136 | phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, |
122 | phys_addr_t size, phys_addr_t align) | 137 | phys_addr_t end, phys_addr_t size, |
138 | phys_addr_t align) | ||
123 | { | 139 | { |
124 | long i; | 140 | return memblock_find_in_range_node(start, end, size, align, |
125 | 141 | MAX_NUMNODES); | |
126 | BUG_ON(0 == size); | ||
127 | |||
128 | /* Pump up max_addr */ | ||
129 | if (end == MEMBLOCK_ALLOC_ACCESSIBLE) | ||
130 | end = memblock.current_limit; | ||
131 | |||
132 | /* We do a top-down search, this tends to limit memory | ||
133 | * fragmentation by keeping early boot allocs near the | ||
134 | * top of memory | ||
135 | */ | ||
136 | for (i = memblock.memory.cnt - 1; i >= 0; i--) { | ||
137 | phys_addr_t memblockbase = memblock.memory.regions[i].base; | ||
138 | phys_addr_t memblocksize = memblock.memory.regions[i].size; | ||
139 | phys_addr_t bottom, top, found; | ||
140 | |||
141 | if (memblocksize < size) | ||
142 | continue; | ||
143 | if ((memblockbase + memblocksize) <= start) | ||
144 | break; | ||
145 | bottom = max(memblockbase, start); | ||
146 | top = min(memblockbase + memblocksize, end); | ||
147 | if (bottom >= top) | ||
148 | continue; | ||
149 | found = memblock_find_region(bottom, top, size, align); | ||
150 | if (found) | ||
151 | return found; | ||
152 | } | ||
153 | return 0; | ||
154 | } | 142 | } |
155 | 143 | ||
156 | /* | 144 | /* |
@@ -607,6 +595,70 @@ void __init_memblock __next_free_mem_range(u64 *idx, int nid, | |||
607 | *idx = ULLONG_MAX; | 595 | *idx = ULLONG_MAX; |
608 | } | 596 | } |
609 | 597 | ||
598 | /** | ||
599 | * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse() | ||
600 | * @idx: pointer to u64 loop variable | ||
601 | * @nid: nid: node selector, %MAX_NUMNODES for all nodes | ||
602 | * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL | ||
603 | * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL | ||
604 | * @p_nid: ptr to int for nid of the range, can be %NULL | ||
605 | * | ||
606 | * Reverse of __next_free_mem_range(). | ||
607 | */ | ||
608 | void __init_memblock __next_free_mem_range_rev(u64 *idx, int nid, | ||
609 | phys_addr_t *out_start, | ||
610 | phys_addr_t *out_end, int *out_nid) | ||
611 | { | ||
612 | struct memblock_type *mem = &memblock.memory; | ||
613 | struct memblock_type *rsv = &memblock.reserved; | ||
614 | int mi = *idx & 0xffffffff; | ||
615 | int ri = *idx >> 32; | ||
616 | |||
617 | if (*idx == (u64)ULLONG_MAX) { | ||
618 | mi = mem->cnt - 1; | ||
619 | ri = rsv->cnt; | ||
620 | } | ||
621 | |||
622 | for ( ; mi >= 0; mi--) { | ||
623 | struct memblock_region *m = &mem->regions[mi]; | ||
624 | phys_addr_t m_start = m->base; | ||
625 | phys_addr_t m_end = m->base + m->size; | ||
626 | |||
627 | /* only memory regions are associated with nodes, check it */ | ||
628 | if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m)) | ||
629 | continue; | ||
630 | |||
631 | /* scan areas before each reservation for intersection */ | ||
632 | for ( ; ri >= 0; ri--) { | ||
633 | struct memblock_region *r = &rsv->regions[ri]; | ||
634 | phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0; | ||
635 | phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX; | ||
636 | |||
637 | /* if ri advanced past mi, break out to advance mi */ | ||
638 | if (r_end <= m_start) | ||
639 | break; | ||
640 | /* if the two regions intersect, we're done */ | ||
641 | if (m_end > r_start) { | ||
642 | if (out_start) | ||
643 | *out_start = max(m_start, r_start); | ||
644 | if (out_end) | ||
645 | *out_end = min(m_end, r_end); | ||
646 | if (out_nid) | ||
647 | *out_nid = memblock_get_region_node(m); | ||
648 | |||
649 | if (m_start >= r_start) | ||
650 | mi--; | ||
651 | else | ||
652 | ri--; | ||
653 | *idx = (u32)mi | (u64)ri << 32; | ||
654 | return; | ||
655 | } | ||
656 | } | ||
657 | } | ||
658 | |||
659 | *idx = ULLONG_MAX; | ||
660 | } | ||
661 | |||
610 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 662 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
611 | /* | 663 | /* |
612 | * Common iterator interface used to define for_each_mem_range(). | 664 | * Common iterator interface used to define for_each_mem_range(). |
@@ -670,22 +722,29 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, | |||
670 | } | 722 | } |
671 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 723 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
672 | 724 | ||
673 | phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) | 725 | static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size, |
726 | phys_addr_t align, phys_addr_t max_addr, | ||
727 | int nid) | ||
674 | { | 728 | { |
675 | phys_addr_t found; | 729 | phys_addr_t found; |
676 | 730 | ||
677 | /* We align the size to limit fragmentation. Without this, a lot of | 731 | found = memblock_find_in_range_node(0, max_addr, size, align, nid); |
678 | * small allocs quickly eat up the whole reserve array on sparc | ||
679 | */ | ||
680 | size = round_up(size, align); | ||
681 | |||
682 | found = memblock_find_in_range(0, max_addr, size, align); | ||
683 | if (found && !memblock_reserve(found, size)) | 732 | if (found && !memblock_reserve(found, size)) |
684 | return found; | 733 | return found; |
685 | 734 | ||
686 | return 0; | 735 | return 0; |
687 | } | 736 | } |
688 | 737 | ||
738 | phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) | ||
739 | { | ||
740 | return memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE, nid); | ||
741 | } | ||
742 | |||
743 | phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) | ||
744 | { | ||
745 | return memblock_alloc_base_nid(size, align, max_addr, MAX_NUMNODES); | ||
746 | } | ||
747 | |||
689 | phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) | 748 | phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) |
690 | { | 749 | { |
691 | phys_addr_t alloc; | 750 | phys_addr_t alloc; |
@@ -704,84 +763,6 @@ phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) | |||
704 | return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); | 763 | return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); |
705 | } | 764 | } |
706 | 765 | ||
707 | |||
708 | /* | ||
709 | * Additional node-local top-down allocators. | ||
710 | * | ||
711 | * WARNING: Only available after early_node_map[] has been populated, | ||
712 | * on some architectures, that is after all the calls to add_active_range() | ||
713 | * have been done to populate it. | ||
714 | */ | ||
715 | |||
716 | static phys_addr_t __init memblock_nid_range_rev(phys_addr_t start, | ||
717 | phys_addr_t end, int *nid) | ||
718 | { | ||
719 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | ||
720 | unsigned long start_pfn, end_pfn; | ||
721 | int i; | ||
722 | |||
723 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, nid) | ||
724 | if (end > PFN_PHYS(start_pfn) && end <= PFN_PHYS(end_pfn)) | ||
725 | return max(start, PFN_PHYS(start_pfn)); | ||
726 | #endif | ||
727 | *nid = 0; | ||
728 | return start; | ||
729 | } | ||
730 | |||
731 | phys_addr_t __init memblock_find_in_range_node(phys_addr_t start, | ||
732 | phys_addr_t end, | ||
733 | phys_addr_t size, | ||
734 | phys_addr_t align, int nid) | ||
735 | { | ||
736 | struct memblock_type *mem = &memblock.memory; | ||
737 | int i; | ||
738 | |||
739 | BUG_ON(0 == size); | ||
740 | |||
741 | /* Pump up max_addr */ | ||
742 | if (end == MEMBLOCK_ALLOC_ACCESSIBLE) | ||
743 | end = memblock.current_limit; | ||
744 | |||
745 | for (i = mem->cnt - 1; i >= 0; i--) { | ||
746 | struct memblock_region *r = &mem->regions[i]; | ||
747 | phys_addr_t base = max(start, r->base); | ||
748 | phys_addr_t top = min(end, r->base + r->size); | ||
749 | |||
750 | while (base < top) { | ||
751 | phys_addr_t tbase, ret; | ||
752 | int tnid; | ||
753 | |||
754 | tbase = memblock_nid_range_rev(base, top, &tnid); | ||
755 | if (nid == MAX_NUMNODES || tnid == nid) { | ||
756 | ret = memblock_find_region(tbase, top, size, align); | ||
757 | if (ret) | ||
758 | return ret; | ||
759 | } | ||
760 | top = tbase; | ||
761 | } | ||
762 | } | ||
763 | |||
764 | return 0; | ||
765 | } | ||
766 | |||
767 | phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) | ||
768 | { | ||
769 | phys_addr_t found; | ||
770 | |||
771 | /* | ||
772 | * We align the size to limit fragmentation. Without this, a lot of | ||
773 | * small allocs quickly eat up the whole reserve array on sparc | ||
774 | */ | ||
775 | size = round_up(size, align); | ||
776 | |||
777 | found = memblock_find_in_range_node(0, MEMBLOCK_ALLOC_ACCESSIBLE, | ||
778 | size, align, nid); | ||
779 | if (found && !memblock_reserve(found, size)) | ||
780 | return found; | ||
781 | |||
782 | return 0; | ||
783 | } | ||
784 | |||
785 | phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) | 766 | phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) |
786 | { | 767 | { |
787 | phys_addr_t res = memblock_alloc_nid(size, align, nid); | 768 | phys_addr_t res = memblock_alloc_nid(size, align, nid); |