diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-26 17:04:32 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-26 17:04:32 -0400 |
commit | c3bf9bc243092c53946fd6d8ebd6dc2f4e572d48 (patch) | |
tree | 4cabbf33e11e3a71b64394b24fe70453f41cefe8 | |
parent | e3505dd50caf54e6f81f897cb347441409974a15 (diff) | |
parent | c2b91e2eec9678dbda274e906cc32ea8f711da3b (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86-bigbox-bootmem-v3
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86-bigbox-bootmem-v3:
x86_64/mm: check and print vmemmap allocation continuous
x86_64: fix setup_node_bootmem to support big mem excluding with memmap
x86_64: make reserve_bootmem_generic() use new reserve_bootmem()
mm: allow reserve_bootmem() cross nodes
mm: offset align in alloc_bootmem()
mm: fix alloc_bootmem_core to use fast searching for all nodes
mm: make mem_map allocation continuous
-rw-r--r-- | arch/x86/kernel/e820_64.c | 13 | ||||
-rw-r--r-- | arch/x86/kernel/setup_64.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 38 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 42 | ||||
-rw-r--r-- | include/asm-x86/e820_64.h | 2 | ||||
-rw-r--r-- | include/linux/mm.h | 1 | ||||
-rw-r--r-- | mm/bootmem.c | 164 | ||||
-rw-r--r-- | mm/sparse.c | 37 |
8 files changed, 228 insertions, 72 deletions
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 79f0d52fa99a..645ee5e32a27 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c | |||
@@ -106,14 +106,19 @@ void __init free_early(unsigned long start, unsigned long end) | |||
106 | early_res[j - 1].end = 0; | 106 | early_res[j - 1].end = 0; |
107 | } | 107 | } |
108 | 108 | ||
109 | void __init early_res_to_bootmem(void) | 109 | void __init early_res_to_bootmem(unsigned long start, unsigned long end) |
110 | { | 110 | { |
111 | int i; | 111 | int i; |
112 | unsigned long final_start, final_end; | ||
112 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | 113 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { |
113 | struct early_res *r = &early_res[i]; | 114 | struct early_res *r = &early_res[i]; |
114 | printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i, | 115 | final_start = max(start, r->start); |
115 | r->start, r->end - 1, r->name); | 116 | final_end = min(end, r->end); |
116 | reserve_bootmem_generic(r->start, r->end - r->start); | 117 | if (final_start >= final_end) |
118 | continue; | ||
119 | printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, | ||
120 | final_start, final_end - 1, r->name); | ||
121 | reserve_bootmem_generic(final_start, final_end - final_start); | ||
117 | } | 122 | } |
118 | } | 123 | } |
119 | 124 | ||
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index b04e2c011e1a..60e64c8eee92 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
@@ -190,6 +190,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
190 | bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); | 190 | bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); |
191 | e820_register_active_regions(0, start_pfn, end_pfn); | 191 | e820_register_active_regions(0, start_pfn, end_pfn); |
192 | free_bootmem_with_active_regions(0, end_pfn); | 192 | free_bootmem_with_active_regions(0, end_pfn); |
193 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); | ||
193 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); | 194 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); |
194 | } | 195 | } |
195 | #endif | 196 | #endif |
@@ -421,8 +422,6 @@ void __init setup_arch(char **cmdline_p) | |||
421 | contig_initmem_init(0, end_pfn); | 422 | contig_initmem_init(0, end_pfn); |
422 | #endif | 423 | #endif |
423 | 424 | ||
424 | early_res_to_bootmem(); | ||
425 | |||
426 | dma32_reserve_bootmem(); | 425 | dma32_reserve_bootmem(); |
427 | 426 | ||
428 | #ifdef CONFIG_ACPI_SLEEP | 427 | #ifdef CONFIG_ACPI_SLEEP |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0cca62663037..5fbb8652cf59 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -810,7 +810,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
810 | void __init reserve_bootmem_generic(unsigned long phys, unsigned len) | 810 | void __init reserve_bootmem_generic(unsigned long phys, unsigned len) |
811 | { | 811 | { |
812 | #ifdef CONFIG_NUMA | 812 | #ifdef CONFIG_NUMA |
813 | int nid = phys_to_nid(phys); | 813 | int nid, next_nid; |
814 | #endif | 814 | #endif |
815 | unsigned long pfn = phys >> PAGE_SHIFT; | 815 | unsigned long pfn = phys >> PAGE_SHIFT; |
816 | 816 | ||
@@ -829,10 +829,16 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) | |||
829 | 829 | ||
830 | /* Should check here against the e820 map to avoid double free */ | 830 | /* Should check here against the e820 map to avoid double free */ |
831 | #ifdef CONFIG_NUMA | 831 | #ifdef CONFIG_NUMA |
832 | reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); | 832 | nid = phys_to_nid(phys); |
833 | next_nid = phys_to_nid(phys + len - 1); | ||
834 | if (nid == next_nid) | ||
835 | reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); | ||
836 | else | ||
837 | reserve_bootmem(phys, len, BOOTMEM_DEFAULT); | ||
833 | #else | 838 | #else |
834 | reserve_bootmem(phys, len, BOOTMEM_DEFAULT); | 839 | reserve_bootmem(phys, len, BOOTMEM_DEFAULT); |
835 | #endif | 840 | #endif |
841 | |||
836 | if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { | 842 | if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { |
837 | dma_reserve += len / PAGE_SIZE; | 843 | dma_reserve += len / PAGE_SIZE; |
838 | set_dma_reserve(dma_reserve); | 844 | set_dma_reserve(dma_reserve); |
@@ -926,6 +932,10 @@ const char *arch_vma_name(struct vm_area_struct *vma) | |||
926 | /* | 932 | /* |
927 | * Initialise the sparsemem vmemmap using huge-pages at the PMD level. | 933 | * Initialise the sparsemem vmemmap using huge-pages at the PMD level. |
928 | */ | 934 | */ |
935 | static long __meminitdata addr_start, addr_end; | ||
936 | static void __meminitdata *p_start, *p_end; | ||
937 | static int __meminitdata node_start; | ||
938 | |||
929 | int __meminit | 939 | int __meminit |
930 | vmemmap_populate(struct page *start_page, unsigned long size, int node) | 940 | vmemmap_populate(struct page *start_page, unsigned long size, int node) |
931 | { | 941 | { |
@@ -960,12 +970,32 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) | |||
960 | PAGE_KERNEL_LARGE); | 970 | PAGE_KERNEL_LARGE); |
961 | set_pmd(pmd, __pmd(pte_val(entry))); | 971 | set_pmd(pmd, __pmd(pte_val(entry))); |
962 | 972 | ||
963 | printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", | 973 | /* check to see if we have contiguous blocks */ |
964 | addr, addr + PMD_SIZE - 1, p, node); | 974 | if (p_end != p || node_start != node) { |
975 | if (p_start) | ||
976 | printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", | ||
977 | addr_start, addr_end-1, p_start, p_end-1, node_start); | ||
978 | addr_start = addr; | ||
979 | node_start = node; | ||
980 | p_start = p; | ||
981 | } | ||
982 | addr_end = addr + PMD_SIZE; | ||
983 | p_end = p + PMD_SIZE; | ||
965 | } else { | 984 | } else { |
966 | vmemmap_verify((pte_t *)pmd, node, addr, next); | 985 | vmemmap_verify((pte_t *)pmd, node, addr, next); |
967 | } | 986 | } |
968 | } | 987 | } |
969 | return 0; | 988 | return 0; |
970 | } | 989 | } |
990 | |||
991 | void __meminit vmemmap_populate_print_last(void) | ||
992 | { | ||
993 | if (p_start) { | ||
994 | printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", | ||
995 | addr_start, addr_end-1, p_start, p_end-1, node_start); | ||
996 | p_start = NULL; | ||
997 | p_end = NULL; | ||
998 | node_start = 0; | ||
999 | } | ||
1000 | } | ||
971 | #endif | 1001 | #endif |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 9a6892200b27..c5066d519e5d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -196,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
196 | unsigned long bootmap_start, nodedata_phys; | 196 | unsigned long bootmap_start, nodedata_phys; |
197 | void *bootmap; | 197 | void *bootmap; |
198 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); | 198 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); |
199 | int nid; | ||
199 | 200 | ||
200 | start = round_up(start, ZONE_ALIGN); | 201 | start = round_up(start, ZONE_ALIGN); |
201 | 202 | ||
@@ -218,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
218 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 219 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
219 | NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; | 220 | NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; |
220 | 221 | ||
221 | /* Find a place for the bootmem map */ | 222 | /* |
223 | * Find a place for the bootmem map | ||
224 | * nodedata_phys could be on other nodes by alloc_bootmem, | ||
225 | * so need to sure bootmap_start not to be small, otherwise | ||
226 | * early_node_mem will get that with find_e820_area instead | ||
227 | * of alloc_bootmem, that could clash with reserved range | ||
228 | */ | ||
222 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); | 229 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); |
223 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); | 230 | nid = phys_to_nid(nodedata_phys); |
231 | if (nid == nodeid) | ||
232 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); | ||
233 | else | ||
234 | bootmap_start = round_up(start, PAGE_SIZE); | ||
224 | /* | 235 | /* |
225 | * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like | 236 | * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like |
226 | * to use that to align to PAGE_SIZE | 237 | * to use that to align to PAGE_SIZE |
@@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
245 | 256 | ||
246 | free_bootmem_with_active_regions(nodeid, end); | 257 | free_bootmem_with_active_regions(nodeid, end); |
247 | 258 | ||
248 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size, | 259 | /* |
249 | BOOTMEM_DEFAULT); | 260 | * convert early reserve to bootmem reserve earlier |
250 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | 261 | * otherwise early_node_mem could use early reserved mem |
251 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | 262 | * on previous node |
263 | */ | ||
264 | early_res_to_bootmem(start, end); | ||
265 | |||
266 | /* | ||
267 | * in some case early_node_mem could use alloc_bootmem | ||
268 | * to get range on other node, don't reserve that again | ||
269 | */ | ||
270 | if (nid != nodeid) | ||
271 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
272 | else | ||
273 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, | ||
274 | pgdat_size, BOOTMEM_DEFAULT); | ||
275 | nid = phys_to_nid(bootmap_start); | ||
276 | if (nid != nodeid) | ||
277 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); | ||
278 | else | ||
279 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | ||
280 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | ||
281 | |||
252 | #ifdef CONFIG_ACPI_NUMA | 282 | #ifdef CONFIG_ACPI_NUMA |
253 | srat_reserve_add_area(nodeid); | 283 | srat_reserve_add_area(nodeid); |
254 | #endif | 284 | #endif |
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h index b5e02e379af3..71c4d685d30d 100644 --- a/include/asm-x86/e820_64.h +++ b/include/asm-x86/e820_64.h | |||
@@ -49,7 +49,7 @@ extern void update_e820(void); | |||
49 | 49 | ||
50 | extern void reserve_early(unsigned long start, unsigned long end, char *name); | 50 | extern void reserve_early(unsigned long start, unsigned long end, char *name); |
51 | extern void free_early(unsigned long start, unsigned long end); | 51 | extern void free_early(unsigned long start, unsigned long end); |
52 | extern void early_res_to_bootmem(void); | 52 | extern void early_res_to_bootmem(unsigned long start, unsigned long end); |
53 | 53 | ||
54 | #endif/*!__ASSEMBLY__*/ | 54 | #endif/*!__ASSEMBLY__*/ |
55 | 55 | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index b695875d63e3..286d31521605 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1229,6 +1229,7 @@ void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); | |||
1229 | int vmemmap_populate_basepages(struct page *start_page, | 1229 | int vmemmap_populate_basepages(struct page *start_page, |
1230 | unsigned long pages, int node); | 1230 | unsigned long pages, int node); |
1231 | int vmemmap_populate(struct page *start_page, unsigned long pages, int node); | 1231 | int vmemmap_populate(struct page *start_page, unsigned long pages, int node); |
1232 | void vmemmap_populate_print_last(void); | ||
1232 | 1233 | ||
1233 | #endif /* __KERNEL__ */ | 1234 | #endif /* __KERNEL__ */ |
1234 | #endif /* _LINUX_MM_H */ | 1235 | #endif /* _LINUX_MM_H */ |
diff --git a/mm/bootmem.c b/mm/bootmem.c index 2ccea700968f..b6791646143e 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -111,44 +111,74 @@ static unsigned long __init init_bootmem_core(pg_data_t *pgdat, | |||
111 | * might be used for boot-time allocations - or it might get added | 111 | * might be used for boot-time allocations - or it might get added |
112 | * to the free page pool later on. | 112 | * to the free page pool later on. |
113 | */ | 113 | */ |
114 | static int __init reserve_bootmem_core(bootmem_data_t *bdata, | 114 | static int __init can_reserve_bootmem_core(bootmem_data_t *bdata, |
115 | unsigned long addr, unsigned long size, int flags) | 115 | unsigned long addr, unsigned long size, int flags) |
116 | { | 116 | { |
117 | unsigned long sidx, eidx; | 117 | unsigned long sidx, eidx; |
118 | unsigned long i; | 118 | unsigned long i; |
119 | int ret; | 119 | |
120 | BUG_ON(!size); | ||
121 | |||
122 | /* out of range, don't hold other */ | ||
123 | if (addr + size < bdata->node_boot_start || | ||
124 | PFN_DOWN(addr) > bdata->node_low_pfn) | ||
125 | return 0; | ||
120 | 126 | ||
121 | /* | 127 | /* |
122 | * round up, partially reserved pages are considered | 128 | * Round up to index to the range. |
123 | * fully reserved. | ||
124 | */ | 129 | */ |
130 | if (addr > bdata->node_boot_start) | ||
131 | sidx= PFN_DOWN(addr - bdata->node_boot_start); | ||
132 | else | ||
133 | sidx = 0; | ||
134 | |||
135 | eidx = PFN_UP(addr + size - bdata->node_boot_start); | ||
136 | if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) | ||
137 | eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); | ||
138 | |||
139 | for (i = sidx; i < eidx; i++) { | ||
140 | if (test_bit(i, bdata->node_bootmem_map)) { | ||
141 | if (flags & BOOTMEM_EXCLUSIVE) | ||
142 | return -EBUSY; | ||
143 | } | ||
144 | } | ||
145 | |||
146 | return 0; | ||
147 | |||
148 | } | ||
149 | |||
150 | static void __init reserve_bootmem_core(bootmem_data_t *bdata, | ||
151 | unsigned long addr, unsigned long size, int flags) | ||
152 | { | ||
153 | unsigned long sidx, eidx; | ||
154 | unsigned long i; | ||
155 | |||
125 | BUG_ON(!size); | 156 | BUG_ON(!size); |
126 | BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn); | ||
127 | BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn); | ||
128 | BUG_ON(addr < bdata->node_boot_start); | ||
129 | 157 | ||
130 | sidx = PFN_DOWN(addr - bdata->node_boot_start); | 158 | /* out of range */ |
159 | if (addr + size < bdata->node_boot_start || | ||
160 | PFN_DOWN(addr) > bdata->node_low_pfn) | ||
161 | return; | ||
162 | |||
163 | /* | ||
164 | * Round up to index to the range. | ||
165 | */ | ||
166 | if (addr > bdata->node_boot_start) | ||
167 | sidx= PFN_DOWN(addr - bdata->node_boot_start); | ||
168 | else | ||
169 | sidx = 0; | ||
170 | |||
131 | eidx = PFN_UP(addr + size - bdata->node_boot_start); | 171 | eidx = PFN_UP(addr + size - bdata->node_boot_start); |
172 | if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) | ||
173 | eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); | ||
132 | 174 | ||
133 | for (i = sidx; i < eidx; i++) | 175 | for (i = sidx; i < eidx; i++) { |
134 | if (test_and_set_bit(i, bdata->node_bootmem_map)) { | 176 | if (test_and_set_bit(i, bdata->node_bootmem_map)) { |
135 | #ifdef CONFIG_DEBUG_BOOTMEM | 177 | #ifdef CONFIG_DEBUG_BOOTMEM |
136 | printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE); | 178 | printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE); |
137 | #endif | 179 | #endif |
138 | if (flags & BOOTMEM_EXCLUSIVE) { | ||
139 | ret = -EBUSY; | ||
140 | goto err; | ||
141 | } | ||
142 | } | 180 | } |
143 | 181 | } | |
144 | return 0; | ||
145 | |||
146 | err: | ||
147 | /* unreserve memory we accidentally reserved */ | ||
148 | for (i--; i >= sidx; i--) | ||
149 | clear_bit(i, bdata->node_bootmem_map); | ||
150 | |||
151 | return ret; | ||
152 | } | 182 | } |
153 | 183 | ||
154 | static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, | 184 | static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, |
@@ -206,9 +236,11 @@ void * __init | |||
206 | __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, | 236 | __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, |
207 | unsigned long align, unsigned long goal, unsigned long limit) | 237 | unsigned long align, unsigned long goal, unsigned long limit) |
208 | { | 238 | { |
209 | unsigned long offset, remaining_size, areasize, preferred; | 239 | unsigned long areasize, preferred; |
210 | unsigned long i, start = 0, incr, eidx, end_pfn; | 240 | unsigned long i, start = 0, incr, eidx, end_pfn; |
211 | void *ret; | 241 | void *ret; |
242 | unsigned long node_boot_start; | ||
243 | void *node_bootmem_map; | ||
212 | 244 | ||
213 | if (!size) { | 245 | if (!size) { |
214 | printk("__alloc_bootmem_core(): zero-sized request\n"); | 246 | printk("__alloc_bootmem_core(): zero-sized request\n"); |
@@ -216,70 +248,83 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, | |||
216 | } | 248 | } |
217 | BUG_ON(align & (align-1)); | 249 | BUG_ON(align & (align-1)); |
218 | 250 | ||
219 | if (limit && bdata->node_boot_start >= limit) | ||
220 | return NULL; | ||
221 | |||
222 | /* on nodes without memory - bootmem_map is NULL */ | 251 | /* on nodes without memory - bootmem_map is NULL */ |
223 | if (!bdata->node_bootmem_map) | 252 | if (!bdata->node_bootmem_map) |
224 | return NULL; | 253 | return NULL; |
225 | 254 | ||
255 | /* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */ | ||
256 | node_boot_start = bdata->node_boot_start; | ||
257 | node_bootmem_map = bdata->node_bootmem_map; | ||
258 | if (align) { | ||
259 | node_boot_start = ALIGN(bdata->node_boot_start, align); | ||
260 | if (node_boot_start > bdata->node_boot_start) | ||
261 | node_bootmem_map = (unsigned long *)bdata->node_bootmem_map + | ||
262 | PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG; | ||
263 | } | ||
264 | |||
265 | if (limit && node_boot_start >= limit) | ||
266 | return NULL; | ||
267 | |||
226 | end_pfn = bdata->node_low_pfn; | 268 | end_pfn = bdata->node_low_pfn; |
227 | limit = PFN_DOWN(limit); | 269 | limit = PFN_DOWN(limit); |
228 | if (limit && end_pfn > limit) | 270 | if (limit && end_pfn > limit) |
229 | end_pfn = limit; | 271 | end_pfn = limit; |
230 | 272 | ||
231 | eidx = end_pfn - PFN_DOWN(bdata->node_boot_start); | 273 | eidx = end_pfn - PFN_DOWN(node_boot_start); |
232 | offset = 0; | ||
233 | if (align && (bdata->node_boot_start & (align - 1UL)) != 0) | ||
234 | offset = align - (bdata->node_boot_start & (align - 1UL)); | ||
235 | offset = PFN_DOWN(offset); | ||
236 | 274 | ||
237 | /* | 275 | /* |
238 | * We try to allocate bootmem pages above 'goal' | 276 | * We try to allocate bootmem pages above 'goal' |
239 | * first, then we try to allocate lower pages. | 277 | * first, then we try to allocate lower pages. |
240 | */ | 278 | */ |
241 | if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) { | 279 | preferred = 0; |
242 | preferred = goal - bdata->node_boot_start; | 280 | if (goal && PFN_DOWN(goal) < end_pfn) { |
281 | if (goal > node_boot_start) | ||
282 | preferred = goal - node_boot_start; | ||
243 | 283 | ||
244 | if (bdata->last_success >= preferred) | 284 | if (bdata->last_success > node_boot_start && |
285 | bdata->last_success - node_boot_start >= preferred) | ||
245 | if (!limit || (limit && limit > bdata->last_success)) | 286 | if (!limit || (limit && limit > bdata->last_success)) |
246 | preferred = bdata->last_success; | 287 | preferred = bdata->last_success - node_boot_start; |
247 | } else | 288 | } |
248 | preferred = 0; | ||
249 | 289 | ||
250 | preferred = PFN_DOWN(ALIGN(preferred, align)) + offset; | 290 | preferred = PFN_DOWN(ALIGN(preferred, align)); |
251 | areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; | 291 | areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; |
252 | incr = align >> PAGE_SHIFT ? : 1; | 292 | incr = align >> PAGE_SHIFT ? : 1; |
253 | 293 | ||
254 | restart_scan: | 294 | restart_scan: |
255 | for (i = preferred; i < eidx; i += incr) { | 295 | for (i = preferred; i < eidx;) { |
256 | unsigned long j; | 296 | unsigned long j; |
257 | i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i); | 297 | |
298 | i = find_next_zero_bit(node_bootmem_map, eidx, i); | ||
258 | i = ALIGN(i, incr); | 299 | i = ALIGN(i, incr); |
259 | if (i >= eidx) | 300 | if (i >= eidx) |
260 | break; | 301 | break; |
261 | if (test_bit(i, bdata->node_bootmem_map)) | 302 | if (test_bit(i, node_bootmem_map)) { |
303 | i += incr; | ||
262 | continue; | 304 | continue; |
305 | } | ||
263 | for (j = i + 1; j < i + areasize; ++j) { | 306 | for (j = i + 1; j < i + areasize; ++j) { |
264 | if (j >= eidx) | 307 | if (j >= eidx) |
265 | goto fail_block; | 308 | goto fail_block; |
266 | if (test_bit(j, bdata->node_bootmem_map)) | 309 | if (test_bit(j, node_bootmem_map)) |
267 | goto fail_block; | 310 | goto fail_block; |
268 | } | 311 | } |
269 | start = i; | 312 | start = i; |
270 | goto found; | 313 | goto found; |
271 | fail_block: | 314 | fail_block: |
272 | i = ALIGN(j, incr); | 315 | i = ALIGN(j, incr); |
316 | if (i == j) | ||
317 | i += incr; | ||
273 | } | 318 | } |
274 | 319 | ||
275 | if (preferred > offset) { | 320 | if (preferred > 0) { |
276 | preferred = offset; | 321 | preferred = 0; |
277 | goto restart_scan; | 322 | goto restart_scan; |
278 | } | 323 | } |
279 | return NULL; | 324 | return NULL; |
280 | 325 | ||
281 | found: | 326 | found: |
282 | bdata->last_success = PFN_PHYS(start); | 327 | bdata->last_success = PFN_PHYS(start) + node_boot_start; |
283 | BUG_ON(start >= eidx); | 328 | BUG_ON(start >= eidx); |
284 | 329 | ||
285 | /* | 330 | /* |
@@ -289,6 +334,7 @@ found: | |||
289 | */ | 334 | */ |
290 | if (align < PAGE_SIZE && | 335 | if (align < PAGE_SIZE && |
291 | bdata->last_offset && bdata->last_pos+1 == start) { | 336 | bdata->last_offset && bdata->last_pos+1 == start) { |
337 | unsigned long offset, remaining_size; | ||
292 | offset = ALIGN(bdata->last_offset, align); | 338 | offset = ALIGN(bdata->last_offset, align); |
293 | BUG_ON(offset > PAGE_SIZE); | 339 | BUG_ON(offset > PAGE_SIZE); |
294 | remaining_size = PAGE_SIZE - offset; | 340 | remaining_size = PAGE_SIZE - offset; |
@@ -297,14 +343,12 @@ found: | |||
297 | /* last_pos unchanged */ | 343 | /* last_pos unchanged */ |
298 | bdata->last_offset = offset + size; | 344 | bdata->last_offset = offset + size; |
299 | ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + | 345 | ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + |
300 | offset + | 346 | offset + node_boot_start); |
301 | bdata->node_boot_start); | ||
302 | } else { | 347 | } else { |
303 | remaining_size = size - remaining_size; | 348 | remaining_size = size - remaining_size; |
304 | areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE; | 349 | areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE; |
305 | ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + | 350 | ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + |
306 | offset + | 351 | offset + node_boot_start); |
307 | bdata->node_boot_start); | ||
308 | bdata->last_pos = start + areasize - 1; | 352 | bdata->last_pos = start + areasize - 1; |
309 | bdata->last_offset = remaining_size; | 353 | bdata->last_offset = remaining_size; |
310 | } | 354 | } |
@@ -312,14 +356,14 @@ found: | |||
312 | } else { | 356 | } else { |
313 | bdata->last_pos = start + areasize - 1; | 357 | bdata->last_pos = start + areasize - 1; |
314 | bdata->last_offset = size & ~PAGE_MASK; | 358 | bdata->last_offset = size & ~PAGE_MASK; |
315 | ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start); | 359 | ret = phys_to_virt(start * PAGE_SIZE + node_boot_start); |
316 | } | 360 | } |
317 | 361 | ||
318 | /* | 362 | /* |
319 | * Reserve the area now: | 363 | * Reserve the area now: |
320 | */ | 364 | */ |
321 | for (i = start; i < start + areasize; i++) | 365 | for (i = start; i < start + areasize; i++) |
322 | if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map))) | 366 | if (unlikely(test_and_set_bit(i, node_bootmem_map))) |
323 | BUG(); | 367 | BUG(); |
324 | memset(ret, 0, size); | 368 | memset(ret, 0, size); |
325 | return ret; | 369 | return ret; |
@@ -401,6 +445,11 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, | |||
401 | void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | 445 | void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, |
402 | unsigned long size, int flags) | 446 | unsigned long size, int flags) |
403 | { | 447 | { |
448 | int ret; | ||
449 | |||
450 | ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); | ||
451 | if (ret < 0) | ||
452 | return; | ||
404 | reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); | 453 | reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); |
405 | } | 454 | } |
406 | 455 | ||
@@ -426,7 +475,18 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) | |||
426 | int __init reserve_bootmem(unsigned long addr, unsigned long size, | 475 | int __init reserve_bootmem(unsigned long addr, unsigned long size, |
427 | int flags) | 476 | int flags) |
428 | { | 477 | { |
429 | return reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size, flags); | 478 | bootmem_data_t *bdata; |
479 | int ret; | ||
480 | |||
481 | list_for_each_entry(bdata, &bdata_list, list) { | ||
482 | ret = can_reserve_bootmem_core(bdata, addr, size, flags); | ||
483 | if (ret < 0) | ||
484 | return ret; | ||
485 | } | ||
486 | list_for_each_entry(bdata, &bdata_list, list) | ||
487 | reserve_bootmem_core(bdata, addr, size, flags); | ||
488 | |||
489 | return 0; | ||
430 | } | 490 | } |
431 | #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ | 491 | #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ |
432 | 492 | ||
diff --git a/mm/sparse.c b/mm/sparse.c index 98d6b39c3472..7e9191381f86 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -295,6 +295,9 @@ struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) | |||
295 | return NULL; | 295 | return NULL; |
296 | } | 296 | } |
297 | 297 | ||
298 | void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) | ||
299 | { | ||
300 | } | ||
298 | /* | 301 | /* |
299 | * Allocate the accumulated non-linear sections, allocate a mem_map | 302 | * Allocate the accumulated non-linear sections, allocate a mem_map |
300 | * for each and record the physical to section mapping. | 303 | * for each and record the physical to section mapping. |
@@ -304,22 +307,50 @@ void __init sparse_init(void) | |||
304 | unsigned long pnum; | 307 | unsigned long pnum; |
305 | struct page *map; | 308 | struct page *map; |
306 | unsigned long *usemap; | 309 | unsigned long *usemap; |
310 | unsigned long **usemap_map; | ||
311 | int size; | ||
312 | |||
313 | /* | ||
314 | * map is using big page (aka 2M in x86 64 bit) | ||
315 | * usemap is less one page (aka 24 bytes) | ||
316 | * so alloc 2M (with 2M align) and 24 bytes in turn will | ||
317 | * make next 2M slip to one more 2M later. | ||
318 | * then in big system, the memory will have a lot of holes... | ||
319 | * here try to allocate 2M pages continously. | ||
320 | * | ||
321 | * powerpc need to call sparse_init_one_section right after each | ||
322 | * sparse_early_mem_map_alloc, so allocate usemap_map at first. | ||
323 | */ | ||
324 | size = sizeof(unsigned long *) * NR_MEM_SECTIONS; | ||
325 | usemap_map = alloc_bootmem(size); | ||
326 | if (!usemap_map) | ||
327 | panic("can not allocate usemap_map\n"); | ||
307 | 328 | ||
308 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | 329 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { |
309 | if (!present_section_nr(pnum)) | 330 | if (!present_section_nr(pnum)) |
310 | continue; | 331 | continue; |
332 | usemap_map[pnum] = sparse_early_usemap_alloc(pnum); | ||
333 | } | ||
311 | 334 | ||
312 | map = sparse_early_mem_map_alloc(pnum); | 335 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { |
313 | if (!map) | 336 | if (!present_section_nr(pnum)) |
314 | continue; | 337 | continue; |
315 | 338 | ||
316 | usemap = sparse_early_usemap_alloc(pnum); | 339 | usemap = usemap_map[pnum]; |
317 | if (!usemap) | 340 | if (!usemap) |
318 | continue; | 341 | continue; |
319 | 342 | ||
343 | map = sparse_early_mem_map_alloc(pnum); | ||
344 | if (!map) | ||
345 | continue; | ||
346 | |||
320 | sparse_init_one_section(__nr_to_section(pnum), pnum, map, | 347 | sparse_init_one_section(__nr_to_section(pnum), pnum, map, |
321 | usemap); | 348 | usemap); |
322 | } | 349 | } |
350 | |||
351 | vmemmap_populate_print_last(); | ||
352 | |||
353 | free_bootmem(__pa(usemap_map), size); | ||
323 | } | 354 | } |
324 | 355 | ||
325 | #ifdef CONFIG_MEMORY_HOTPLUG | 356 | #ifdef CONFIG_MEMORY_HOTPLUG |