aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-04-26 17:04:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-26 17:04:32 -0400
commitc3bf9bc243092c53946fd6d8ebd6dc2f4e572d48 (patch)
tree4cabbf33e11e3a71b64394b24fe70453f41cefe8
parente3505dd50caf54e6f81f897cb347441409974a15 (diff)
parentc2b91e2eec9678dbda274e906cc32ea8f711da3b (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86-bigbox-bootmem-v3
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86-bigbox-bootmem-v3: x86_64/mm: check and print vmemmap allocation continuous x86_64: fix setup_node_bootmem to support big mem excluding with memmap x86_64: make reserve_bootmem_generic() use new reserve_bootmem() mm: allow reserve_bootmem() cross nodes mm: offset align in alloc_bootmem() mm: fix alloc_bootmem_core to use fast searching for all nodes mm: make mem_map allocation continuous
-rw-r--r--arch/x86/kernel/e820_64.c13
-rw-r--r--arch/x86/kernel/setup_64.c3
-rw-r--r--arch/x86/mm/init_64.c38
-rw-r--r--arch/x86/mm/numa_64.c42
-rw-r--r--include/asm-x86/e820_64.h2
-rw-r--r--include/linux/mm.h1
-rw-r--r--mm/bootmem.c164
-rw-r--r--mm/sparse.c37
8 files changed, 228 insertions, 72 deletions
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 79f0d52fa99a..645ee5e32a27 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -106,14 +106,19 @@ void __init free_early(unsigned long start, unsigned long end)
106 early_res[j - 1].end = 0; 106 early_res[j - 1].end = 0;
107} 107}
108 108
109void __init early_res_to_bootmem(void) 109void __init early_res_to_bootmem(unsigned long start, unsigned long end)
110{ 110{
111 int i; 111 int i;
112 unsigned long final_start, final_end;
112 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { 113 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
113 struct early_res *r = &early_res[i]; 114 struct early_res *r = &early_res[i];
114 printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i, 115 final_start = max(start, r->start);
115 r->start, r->end - 1, r->name); 116 final_end = min(end, r->end);
116 reserve_bootmem_generic(r->start, r->end - r->start); 117 if (final_start >= final_end)
118 continue;
119 printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i,
120 final_start, final_end - 1, r->name);
121 reserve_bootmem_generic(final_start, final_end - final_start);
117 } 122 }
118} 123}
119 124
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index b04e2c011e1a..60e64c8eee92 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -190,6 +190,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
190 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); 190 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
191 e820_register_active_regions(0, start_pfn, end_pfn); 191 e820_register_active_regions(0, start_pfn, end_pfn);
192 free_bootmem_with_active_regions(0, end_pfn); 192 free_bootmem_with_active_regions(0, end_pfn);
193 early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
193 reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); 194 reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
194} 195}
195#endif 196#endif
@@ -421,8 +422,6 @@ void __init setup_arch(char **cmdline_p)
421 contig_initmem_init(0, end_pfn); 422 contig_initmem_init(0, end_pfn);
422#endif 423#endif
423 424
424 early_res_to_bootmem();
425
426 dma32_reserve_bootmem(); 425 dma32_reserve_bootmem();
427 426
428#ifdef CONFIG_ACPI_SLEEP 427#ifdef CONFIG_ACPI_SLEEP
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0cca62663037..5fbb8652cf59 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -810,7 +810,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
810void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 810void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
811{ 811{
812#ifdef CONFIG_NUMA 812#ifdef CONFIG_NUMA
813 int nid = phys_to_nid(phys); 813 int nid, next_nid;
814#endif 814#endif
815 unsigned long pfn = phys >> PAGE_SHIFT; 815 unsigned long pfn = phys >> PAGE_SHIFT;
816 816
@@ -829,10 +829,16 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
829 829
830 /* Should check here against the e820 map to avoid double free */ 830 /* Should check here against the e820 map to avoid double free */
831#ifdef CONFIG_NUMA 831#ifdef CONFIG_NUMA
832 reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); 832 nid = phys_to_nid(phys);
833 next_nid = phys_to_nid(phys + len - 1);
834 if (nid == next_nid)
835 reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
836 else
837 reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
833#else 838#else
834 reserve_bootmem(phys, len, BOOTMEM_DEFAULT); 839 reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
835#endif 840#endif
841
836 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { 842 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
837 dma_reserve += len / PAGE_SIZE; 843 dma_reserve += len / PAGE_SIZE;
838 set_dma_reserve(dma_reserve); 844 set_dma_reserve(dma_reserve);
@@ -926,6 +932,10 @@ const char *arch_vma_name(struct vm_area_struct *vma)
926/* 932/*
927 * Initialise the sparsemem vmemmap using huge-pages at the PMD level. 933 * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
928 */ 934 */
935static long __meminitdata addr_start, addr_end;
936static void __meminitdata *p_start, *p_end;
937static int __meminitdata node_start;
938
929int __meminit 939int __meminit
930vmemmap_populate(struct page *start_page, unsigned long size, int node) 940vmemmap_populate(struct page *start_page, unsigned long size, int node)
931{ 941{
@@ -960,12 +970,32 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
960 PAGE_KERNEL_LARGE); 970 PAGE_KERNEL_LARGE);
961 set_pmd(pmd, __pmd(pte_val(entry))); 971 set_pmd(pmd, __pmd(pte_val(entry)));
962 972
963 printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", 973 /* check to see if we have contiguous blocks */
964 addr, addr + PMD_SIZE - 1, p, node); 974 if (p_end != p || node_start != node) {
975 if (p_start)
976 printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
977 addr_start, addr_end-1, p_start, p_end-1, node_start);
978 addr_start = addr;
979 node_start = node;
980 p_start = p;
981 }
982 addr_end = addr + PMD_SIZE;
983 p_end = p + PMD_SIZE;
965 } else { 984 } else {
966 vmemmap_verify((pte_t *)pmd, node, addr, next); 985 vmemmap_verify((pte_t *)pmd, node, addr, next);
967 } 986 }
968 } 987 }
969 return 0; 988 return 0;
970} 989}
990
991void __meminit vmemmap_populate_print_last(void)
992{
993 if (p_start) {
994 printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
995 addr_start, addr_end-1, p_start, p_end-1, node_start);
996 p_start = NULL;
997 p_end = NULL;
998 node_start = 0;
999 }
1000}
971#endif 1001#endif
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 9a6892200b27..c5066d519e5d 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -196,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
196 unsigned long bootmap_start, nodedata_phys; 196 unsigned long bootmap_start, nodedata_phys;
197 void *bootmap; 197 void *bootmap;
198 const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); 198 const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
199 int nid;
199 200
200 start = round_up(start, ZONE_ALIGN); 201 start = round_up(start, ZONE_ALIGN);
201 202
@@ -218,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
218 NODE_DATA(nodeid)->node_start_pfn = start_pfn; 219 NODE_DATA(nodeid)->node_start_pfn = start_pfn;
219 NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; 220 NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
220 221
221 /* Find a place for the bootmem map */ 222 /*
223 * Find a place for the bootmem map
224 * nodedata_phys could be on other nodes by alloc_bootmem,
225 * so need to sure bootmap_start not to be small, otherwise
226 * early_node_mem will get that with find_e820_area instead
227 * of alloc_bootmem, that could clash with reserved range
228 */
222 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 229 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
223 bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); 230 nid = phys_to_nid(nodedata_phys);
231 if (nid == nodeid)
232 bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
233 else
234 bootmap_start = round_up(start, PAGE_SIZE);
224 /* 235 /*
225 * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like 236 * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like
226 * to use that to align to PAGE_SIZE 237 * to use that to align to PAGE_SIZE
@@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
245 256
246 free_bootmem_with_active_regions(nodeid, end); 257 free_bootmem_with_active_regions(nodeid, end);
247 258
248 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size, 259 /*
249 BOOTMEM_DEFAULT); 260 * convert early reserve to bootmem reserve earlier
250 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, 261 * otherwise early_node_mem could use early reserved mem
251 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); 262 * on previous node
263 */
264 early_res_to_bootmem(start, end);
265
266 /*
267 * in some case early_node_mem could use alloc_bootmem
268 * to get range on other node, don't reserve that again
269 */
270 if (nid != nodeid)
271 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
272 else
273 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys,
274 pgdat_size, BOOTMEM_DEFAULT);
275 nid = phys_to_nid(bootmap_start);
276 if (nid != nodeid)
277 printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);
278 else
279 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
280 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
281
252#ifdef CONFIG_ACPI_NUMA 282#ifdef CONFIG_ACPI_NUMA
253 srat_reserve_add_area(nodeid); 283 srat_reserve_add_area(nodeid);
254#endif 284#endif
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index b5e02e379af3..71c4d685d30d 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -49,7 +49,7 @@ extern void update_e820(void);
49 49
50extern void reserve_early(unsigned long start, unsigned long end, char *name); 50extern void reserve_early(unsigned long start, unsigned long end, char *name);
51extern void free_early(unsigned long start, unsigned long end); 51extern void free_early(unsigned long start, unsigned long end);
52extern void early_res_to_bootmem(void); 52extern void early_res_to_bootmem(unsigned long start, unsigned long end);
53 53
54#endif/*!__ASSEMBLY__*/ 54#endif/*!__ASSEMBLY__*/
55 55
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b695875d63e3..286d31521605 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1229,6 +1229,7 @@ void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
1229int vmemmap_populate_basepages(struct page *start_page, 1229int vmemmap_populate_basepages(struct page *start_page,
1230 unsigned long pages, int node); 1230 unsigned long pages, int node);
1231int vmemmap_populate(struct page *start_page, unsigned long pages, int node); 1231int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
1232void vmemmap_populate_print_last(void);
1232 1233
1233#endif /* __KERNEL__ */ 1234#endif /* __KERNEL__ */
1234#endif /* _LINUX_MM_H */ 1235#endif /* _LINUX_MM_H */
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 2ccea700968f..b6791646143e 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -111,44 +111,74 @@ static unsigned long __init init_bootmem_core(pg_data_t *pgdat,
111 * might be used for boot-time allocations - or it might get added 111 * might be used for boot-time allocations - or it might get added
112 * to the free page pool later on. 112 * to the free page pool later on.
113 */ 113 */
114static int __init reserve_bootmem_core(bootmem_data_t *bdata, 114static int __init can_reserve_bootmem_core(bootmem_data_t *bdata,
115 unsigned long addr, unsigned long size, int flags) 115 unsigned long addr, unsigned long size, int flags)
116{ 116{
117 unsigned long sidx, eidx; 117 unsigned long sidx, eidx;
118 unsigned long i; 118 unsigned long i;
119 int ret; 119
120 BUG_ON(!size);
121
122 /* out of range, don't hold other */
123 if (addr + size < bdata->node_boot_start ||
124 PFN_DOWN(addr) > bdata->node_low_pfn)
125 return 0;
120 126
121 /* 127 /*
122 * round up, partially reserved pages are considered 128 * Round up to index to the range.
123 * fully reserved.
124 */ 129 */
130 if (addr > bdata->node_boot_start)
131 sidx= PFN_DOWN(addr - bdata->node_boot_start);
132 else
133 sidx = 0;
134
135 eidx = PFN_UP(addr + size - bdata->node_boot_start);
136 if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
137 eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
138
139 for (i = sidx; i < eidx; i++) {
140 if (test_bit(i, bdata->node_bootmem_map)) {
141 if (flags & BOOTMEM_EXCLUSIVE)
142 return -EBUSY;
143 }
144 }
145
146 return 0;
147
148}
149
150static void __init reserve_bootmem_core(bootmem_data_t *bdata,
151 unsigned long addr, unsigned long size, int flags)
152{
153 unsigned long sidx, eidx;
154 unsigned long i;
155
125 BUG_ON(!size); 156 BUG_ON(!size);
126 BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn);
127 BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn);
128 BUG_ON(addr < bdata->node_boot_start);
129 157
130 sidx = PFN_DOWN(addr - bdata->node_boot_start); 158 /* out of range */
159 if (addr + size < bdata->node_boot_start ||
160 PFN_DOWN(addr) > bdata->node_low_pfn)
161 return;
162
163 /*
164 * Round up to index to the range.
165 */
166 if (addr > bdata->node_boot_start)
167 sidx= PFN_DOWN(addr - bdata->node_boot_start);
168 else
169 sidx = 0;
170
131 eidx = PFN_UP(addr + size - bdata->node_boot_start); 171 eidx = PFN_UP(addr + size - bdata->node_boot_start);
172 if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
173 eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
132 174
133 for (i = sidx; i < eidx; i++) 175 for (i = sidx; i < eidx; i++) {
134 if (test_and_set_bit(i, bdata->node_bootmem_map)) { 176 if (test_and_set_bit(i, bdata->node_bootmem_map)) {
135#ifdef CONFIG_DEBUG_BOOTMEM 177#ifdef CONFIG_DEBUG_BOOTMEM
136 printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE); 178 printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE);
137#endif 179#endif
138 if (flags & BOOTMEM_EXCLUSIVE) {
139 ret = -EBUSY;
140 goto err;
141 }
142 } 180 }
143 181 }
144 return 0;
145
146err:
147 /* unreserve memory we accidentally reserved */
148 for (i--; i >= sidx; i--)
149 clear_bit(i, bdata->node_bootmem_map);
150
151 return ret;
152} 182}
153 183
154static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, 184static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
@@ -206,9 +236,11 @@ void * __init
206__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, 236__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
207 unsigned long align, unsigned long goal, unsigned long limit) 237 unsigned long align, unsigned long goal, unsigned long limit)
208{ 238{
209 unsigned long offset, remaining_size, areasize, preferred; 239 unsigned long areasize, preferred;
210 unsigned long i, start = 0, incr, eidx, end_pfn; 240 unsigned long i, start = 0, incr, eidx, end_pfn;
211 void *ret; 241 void *ret;
242 unsigned long node_boot_start;
243 void *node_bootmem_map;
212 244
213 if (!size) { 245 if (!size) {
214 printk("__alloc_bootmem_core(): zero-sized request\n"); 246 printk("__alloc_bootmem_core(): zero-sized request\n");
@@ -216,70 +248,83 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
216 } 248 }
217 BUG_ON(align & (align-1)); 249 BUG_ON(align & (align-1));
218 250
219 if (limit && bdata->node_boot_start >= limit)
220 return NULL;
221
222 /* on nodes without memory - bootmem_map is NULL */ 251 /* on nodes without memory - bootmem_map is NULL */
223 if (!bdata->node_bootmem_map) 252 if (!bdata->node_bootmem_map)
224 return NULL; 253 return NULL;
225 254
255 /* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */
256 node_boot_start = bdata->node_boot_start;
257 node_bootmem_map = bdata->node_bootmem_map;
258 if (align) {
259 node_boot_start = ALIGN(bdata->node_boot_start, align);
260 if (node_boot_start > bdata->node_boot_start)
261 node_bootmem_map = (unsigned long *)bdata->node_bootmem_map +
262 PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG;
263 }
264
265 if (limit && node_boot_start >= limit)
266 return NULL;
267
226 end_pfn = bdata->node_low_pfn; 268 end_pfn = bdata->node_low_pfn;
227 limit = PFN_DOWN(limit); 269 limit = PFN_DOWN(limit);
228 if (limit && end_pfn > limit) 270 if (limit && end_pfn > limit)
229 end_pfn = limit; 271 end_pfn = limit;
230 272
231 eidx = end_pfn - PFN_DOWN(bdata->node_boot_start); 273 eidx = end_pfn - PFN_DOWN(node_boot_start);
232 offset = 0;
233 if (align && (bdata->node_boot_start & (align - 1UL)) != 0)
234 offset = align - (bdata->node_boot_start & (align - 1UL));
235 offset = PFN_DOWN(offset);
236 274
237 /* 275 /*
238 * We try to allocate bootmem pages above 'goal' 276 * We try to allocate bootmem pages above 'goal'
239 * first, then we try to allocate lower pages. 277 * first, then we try to allocate lower pages.
240 */ 278 */
241 if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) { 279 preferred = 0;
242 preferred = goal - bdata->node_boot_start; 280 if (goal && PFN_DOWN(goal) < end_pfn) {
281 if (goal > node_boot_start)
282 preferred = goal - node_boot_start;
243 283
244 if (bdata->last_success >= preferred) 284 if (bdata->last_success > node_boot_start &&
285 bdata->last_success - node_boot_start >= preferred)
245 if (!limit || (limit && limit > bdata->last_success)) 286 if (!limit || (limit && limit > bdata->last_success))
246 preferred = bdata->last_success; 287 preferred = bdata->last_success - node_boot_start;
247 } else 288 }
248 preferred = 0;
249 289
250 preferred = PFN_DOWN(ALIGN(preferred, align)) + offset; 290 preferred = PFN_DOWN(ALIGN(preferred, align));
251 areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; 291 areasize = (size + PAGE_SIZE-1) / PAGE_SIZE;
252 incr = align >> PAGE_SHIFT ? : 1; 292 incr = align >> PAGE_SHIFT ? : 1;
253 293
254restart_scan: 294restart_scan:
255 for (i = preferred; i < eidx; i += incr) { 295 for (i = preferred; i < eidx;) {
256 unsigned long j; 296 unsigned long j;
257 i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i); 297
298 i = find_next_zero_bit(node_bootmem_map, eidx, i);
258 i = ALIGN(i, incr); 299 i = ALIGN(i, incr);
259 if (i >= eidx) 300 if (i >= eidx)
260 break; 301 break;
261 if (test_bit(i, bdata->node_bootmem_map)) 302 if (test_bit(i, node_bootmem_map)) {
303 i += incr;
262 continue; 304 continue;
305 }
263 for (j = i + 1; j < i + areasize; ++j) { 306 for (j = i + 1; j < i + areasize; ++j) {
264 if (j >= eidx) 307 if (j >= eidx)
265 goto fail_block; 308 goto fail_block;
266 if (test_bit(j, bdata->node_bootmem_map)) 309 if (test_bit(j, node_bootmem_map))
267 goto fail_block; 310 goto fail_block;
268 } 311 }
269 start = i; 312 start = i;
270 goto found; 313 goto found;
271 fail_block: 314 fail_block:
272 i = ALIGN(j, incr); 315 i = ALIGN(j, incr);
316 if (i == j)
317 i += incr;
273 } 318 }
274 319
275 if (preferred > offset) { 320 if (preferred > 0) {
276 preferred = offset; 321 preferred = 0;
277 goto restart_scan; 322 goto restart_scan;
278 } 323 }
279 return NULL; 324 return NULL;
280 325
281found: 326found:
282 bdata->last_success = PFN_PHYS(start); 327 bdata->last_success = PFN_PHYS(start) + node_boot_start;
283 BUG_ON(start >= eidx); 328 BUG_ON(start >= eidx);
284 329
285 /* 330 /*
@@ -289,6 +334,7 @@ found:
289 */ 334 */
290 if (align < PAGE_SIZE && 335 if (align < PAGE_SIZE &&
291 bdata->last_offset && bdata->last_pos+1 == start) { 336 bdata->last_offset && bdata->last_pos+1 == start) {
337 unsigned long offset, remaining_size;
292 offset = ALIGN(bdata->last_offset, align); 338 offset = ALIGN(bdata->last_offset, align);
293 BUG_ON(offset > PAGE_SIZE); 339 BUG_ON(offset > PAGE_SIZE);
294 remaining_size = PAGE_SIZE - offset; 340 remaining_size = PAGE_SIZE - offset;
@@ -297,14 +343,12 @@ found:
297 /* last_pos unchanged */ 343 /* last_pos unchanged */
298 bdata->last_offset = offset + size; 344 bdata->last_offset = offset + size;
299 ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + 345 ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
300 offset + 346 offset + node_boot_start);
301 bdata->node_boot_start);
302 } else { 347 } else {
303 remaining_size = size - remaining_size; 348 remaining_size = size - remaining_size;
304 areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE; 349 areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
305 ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + 350 ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
306 offset + 351 offset + node_boot_start);
307 bdata->node_boot_start);
308 bdata->last_pos = start + areasize - 1; 352 bdata->last_pos = start + areasize - 1;
309 bdata->last_offset = remaining_size; 353 bdata->last_offset = remaining_size;
310 } 354 }
@@ -312,14 +356,14 @@ found:
312 } else { 356 } else {
313 bdata->last_pos = start + areasize - 1; 357 bdata->last_pos = start + areasize - 1;
314 bdata->last_offset = size & ~PAGE_MASK; 358 bdata->last_offset = size & ~PAGE_MASK;
315 ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start); 359 ret = phys_to_virt(start * PAGE_SIZE + node_boot_start);
316 } 360 }
317 361
318 /* 362 /*
319 * Reserve the area now: 363 * Reserve the area now:
320 */ 364 */
321 for (i = start; i < start + areasize; i++) 365 for (i = start; i < start + areasize; i++)
322 if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map))) 366 if (unlikely(test_and_set_bit(i, node_bootmem_map)))
323 BUG(); 367 BUG();
324 memset(ret, 0, size); 368 memset(ret, 0, size);
325 return ret; 369 return ret;
@@ -401,6 +445,11 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
401void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 445void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
402 unsigned long size, int flags) 446 unsigned long size, int flags)
403{ 447{
448 int ret;
449
450 ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
451 if (ret < 0)
452 return;
404 reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); 453 reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
405} 454}
406 455
@@ -426,7 +475,18 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
426int __init reserve_bootmem(unsigned long addr, unsigned long size, 475int __init reserve_bootmem(unsigned long addr, unsigned long size,
427 int flags) 476 int flags)
428{ 477{
429 return reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size, flags); 478 bootmem_data_t *bdata;
479 int ret;
480
481 list_for_each_entry(bdata, &bdata_list, list) {
482 ret = can_reserve_bootmem_core(bdata, addr, size, flags);
483 if (ret < 0)
484 return ret;
485 }
486 list_for_each_entry(bdata, &bdata_list, list)
487 reserve_bootmem_core(bdata, addr, size, flags);
488
489 return 0;
430} 490}
431#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ 491#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
432 492
diff --git a/mm/sparse.c b/mm/sparse.c
index 98d6b39c3472..7e9191381f86 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -295,6 +295,9 @@ struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
295 return NULL; 295 return NULL;
296} 296}
297 297
298void __attribute__((weak)) __meminit vmemmap_populate_print_last(void)
299{
300}
298/* 301/*
299 * Allocate the accumulated non-linear sections, allocate a mem_map 302 * Allocate the accumulated non-linear sections, allocate a mem_map
300 * for each and record the physical to section mapping. 303 * for each and record the physical to section mapping.
@@ -304,22 +307,50 @@ void __init sparse_init(void)
304 unsigned long pnum; 307 unsigned long pnum;
305 struct page *map; 308 struct page *map;
306 unsigned long *usemap; 309 unsigned long *usemap;
310 unsigned long **usemap_map;
311 int size;
312
313 /*
314 * map is using big page (aka 2M in x86 64 bit)
315 * usemap is less one page (aka 24 bytes)
316 * so alloc 2M (with 2M align) and 24 bytes in turn will
317 * make next 2M slip to one more 2M later.
318 * then in big system, the memory will have a lot of holes...
319 * here try to allocate 2M pages continously.
320 *
321 * powerpc need to call sparse_init_one_section right after each
322 * sparse_early_mem_map_alloc, so allocate usemap_map at first.
323 */
324 size = sizeof(unsigned long *) * NR_MEM_SECTIONS;
325 usemap_map = alloc_bootmem(size);
326 if (!usemap_map)
327 panic("can not allocate usemap_map\n");
307 328
308 for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { 329 for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
309 if (!present_section_nr(pnum)) 330 if (!present_section_nr(pnum))
310 continue; 331 continue;
332 usemap_map[pnum] = sparse_early_usemap_alloc(pnum);
333 }
311 334
312 map = sparse_early_mem_map_alloc(pnum); 335 for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
313 if (!map) 336 if (!present_section_nr(pnum))
314 continue; 337 continue;
315 338
316 usemap = sparse_early_usemap_alloc(pnum); 339 usemap = usemap_map[pnum];
317 if (!usemap) 340 if (!usemap)
318 continue; 341 continue;
319 342
343 map = sparse_early_mem_map_alloc(pnum);
344 if (!map)
345 continue;
346
320 sparse_init_one_section(__nr_to_section(pnum), pnum, map, 347 sparse_init_one_section(__nr_to_section(pnum), pnum, map,
321 usemap); 348 usemap);
322 } 349 }
350
351 vmemmap_populate_print_last();
352
353 free_bootmem(__pa(usemap_map), size);
323} 354}
324 355
325#ifdef CONFIG_MEMORY_HOTPLUG 356#ifdef CONFIG_MEMORY_HOTPLUG