aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/mm/init.c
diff options
context:
space:
mode:
authorRussell King <rmk@dyn-67.arm.linux.org.uk>2005-10-28 09:48:37 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2005-10-28 09:48:37 -0400
commit90072059d2963dec237ae0cf49831ef77ddb5739 (patch)
tree5ec0cc3e9759599957ea98eb9f5c372ffabca00f /arch/arm/mm/init.c
parentf339ab3d6c59f8f898c165384aa2b6a0ae5d4c1c (diff)
[ARM] Re-jig bootmem initialisation
Make ARM independent of the way bootmem operates internally. We now map each node as we initialise it, and place the bootmem bitmap inside each node, rather than all in the first node. Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/mm/init.c')
-rw-r--r--arch/arm/mm/init.c480
1 files changed, 239 insertions, 241 deletions
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index edffa47a4b2a..d1f1ec73500f 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * linux/arch/arm/mm/init.c 2 * linux/arch/arm/mm/init.c
3 * 3 *
4 * Copyright (C) 1995-2002 Russell King 4 * Copyright (C) 1995-2005 Russell King
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
@@ -86,14 +86,19 @@ void show_mem(void)
86 printk("%d pages swap cached\n", cached); 86 printk("%d pages swap cached\n", cached);
87} 87}
88 88
89struct node_info { 89static inline pmd_t *pmd_off(pgd_t *pgd, unsigned long virt)
90 unsigned int start; 90{
91 unsigned int end; 91 return pmd_offset(pgd, virt);
92 int bootmap_pages; 92}
93}; 93
94static inline pmd_t *pmd_off_k(unsigned long virt)
95{
96 return pmd_off(pgd_offset_k(virt), virt);
97}
94 98
95#define O_PFN_DOWN(x) ((x) >> PAGE_SHIFT) 99#define for_each_nodebank(iter,mi,no) \
96#define O_PFN_UP(x) (PAGE_ALIGN(x) >> PAGE_SHIFT) 100 for (iter = 0; iter < mi->nr_banks; iter++) \
101 if (mi->bank[iter].node == no)
97 102
98/* 103/*
99 * FIXME: We really want to avoid allocating the bootmap bitmap 104 * FIXME: We really want to avoid allocating the bootmap bitmap
@@ -106,15 +111,12 @@ find_bootmap_pfn(int node, struct meminfo *mi, unsigned int bootmap_pages)
106{ 111{
107 unsigned int start_pfn, bank, bootmap_pfn; 112 unsigned int start_pfn, bank, bootmap_pfn;
108 113
109 start_pfn = O_PFN_UP(__pa(&_end)); 114 start_pfn = PAGE_ALIGN(__pa(&_end)) >> PAGE_SHIFT;
110 bootmap_pfn = 0; 115 bootmap_pfn = 0;
111 116
112 for (bank = 0; bank < mi->nr_banks; bank ++) { 117 for_each_nodebank(bank, mi, node) {
113 unsigned int start, end; 118 unsigned int start, end;
114 119
115 if (mi->bank[bank].node != node)
116 continue;
117
118 start = mi->bank[bank].start >> PAGE_SHIFT; 120 start = mi->bank[bank].start >> PAGE_SHIFT;
119 end = (mi->bank[bank].size + 121 end = (mi->bank[bank].size +
120 mi->bank[bank].start) >> PAGE_SHIFT; 122 mi->bank[bank].start) >> PAGE_SHIFT;
@@ -140,92 +142,6 @@ find_bootmap_pfn(int node, struct meminfo *mi, unsigned int bootmap_pages)
140 return bootmap_pfn; 142 return bootmap_pfn;
141} 143}
142 144
143/*
144 * Scan the memory info structure and pull out:
145 * - the end of memory
146 * - the number of nodes
147 * - the pfn range of each node
148 * - the number of bootmem bitmap pages
149 */
150static unsigned int __init
151find_memend_and_nodes(struct meminfo *mi, struct node_info *np)
152{
153 unsigned int i, bootmem_pages = 0, memend_pfn = 0;
154
155 for (i = 0; i < MAX_NUMNODES; i++) {
156 np[i].start = -1U;
157 np[i].end = 0;
158 np[i].bootmap_pages = 0;
159 }
160
161 for (i = 0; i < mi->nr_banks; i++) {
162 unsigned long start, end;
163 int node;
164
165 if (mi->bank[i].size == 0) {
166 /*
167 * Mark this bank with an invalid node number
168 */
169 mi->bank[i].node = -1;
170 continue;
171 }
172
173 node = mi->bank[i].node;
174
175 /*
176 * Make sure we haven't exceeded the maximum number of nodes
177 * that we have in this configuration. If we have, we're in
178 * trouble. (maybe we ought to limit, instead of bugging?)
179 */
180 if (node >= MAX_NUMNODES)
181 BUG();
182 node_set_online(node);
183
184 /*
185 * Get the start and end pfns for this bank
186 */
187 start = mi->bank[i].start >> PAGE_SHIFT;
188 end = (mi->bank[i].start + mi->bank[i].size) >> PAGE_SHIFT;
189
190 if (np[node].start > start)
191 np[node].start = start;
192
193 if (np[node].end < end)
194 np[node].end = end;
195
196 if (memend_pfn < end)
197 memend_pfn = end;
198 }
199
200 /*
201 * Calculate the number of pages we require to
202 * store the bootmem bitmaps.
203 */
204 for_each_online_node(i) {
205 if (np[i].end == 0)
206 continue;
207
208 np[i].bootmap_pages = bootmem_bootmap_pages(np[i].end -
209 np[i].start);
210 bootmem_pages += np[i].bootmap_pages;
211 }
212
213 high_memory = __va(memend_pfn << PAGE_SHIFT);
214
215 /*
216 * This doesn't seem to be used by the Linux memory
217 * manager any more. If we can get rid of it, we
218 * also get rid of some of the stuff above as well.
219 *
220 * Note: max_low_pfn and max_pfn reflect the number
221 * of _pages_ in the system, not the maximum PFN.
222 */
223 max_low_pfn = memend_pfn - O_PFN_DOWN(PHYS_OFFSET);
224 max_pfn = memend_pfn - O_PFN_DOWN(PHYS_OFFSET);
225
226 return bootmem_pages;
227}
228
229static int __init check_initrd(struct meminfo *mi) 145static int __init check_initrd(struct meminfo *mi)
230{ 146{
231 int initrd_node = -2; 147 int initrd_node = -2;
@@ -266,9 +182,8 @@ static int __init check_initrd(struct meminfo *mi)
266/* 182/*
267 * Reserve the various regions of node 0 183 * Reserve the various regions of node 0
268 */ 184 */
269static __init void reserve_node_zero(unsigned int bootmap_pfn, unsigned int bootmap_pages) 185static __init void reserve_node_zero(pg_data_t *pgdat)
270{ 186{
271 pg_data_t *pgdat = NODE_DATA(0);
272 unsigned long res_size = 0; 187 unsigned long res_size = 0;
273 188
274 /* 189 /*
@@ -289,13 +204,6 @@ static __init void reserve_node_zero(unsigned int bootmap_pfn, unsigned int boot
289 PTRS_PER_PGD * sizeof(pgd_t)); 204 PTRS_PER_PGD * sizeof(pgd_t));
290 205
291 /* 206 /*
292 * And don't forget to reserve the allocator bitmap,
293 * which will be freed later.
294 */
295 reserve_bootmem_node(pgdat, bootmap_pfn << PAGE_SHIFT,
296 bootmap_pages << PAGE_SHIFT);
297
298 /*
299 * Hmm... This should go elsewhere, but we really really need to 207 * Hmm... This should go elsewhere, but we really really need to
300 * stop things allocating the low memory; ideally we need a better 208 * stop things allocating the low memory; ideally we need a better
301 * implementation of GFP_DMA which does not assume that DMA-able 209 * implementation of GFP_DMA which does not assume that DMA-able
@@ -324,183 +232,276 @@ static __init void reserve_node_zero(unsigned int bootmap_pfn, unsigned int boot
324 reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size); 232 reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size);
325} 233}
326 234
327/* 235void __init build_mem_type_table(void);
328 * Register all available RAM in this node with the bootmem allocator. 236void __init create_mapping(struct map_desc *md);
329 */ 237
330static inline void free_bootmem_node_bank(int node, struct meminfo *mi) 238static unsigned long __init
239bootmem_init_node(int node, int initrd_node, struct meminfo *mi)
331{ 240{
332 pg_data_t *pgdat = NODE_DATA(node); 241 unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
333 int bank; 242 unsigned long start_pfn, end_pfn, boot_pfn;
243 unsigned int boot_pages;
244 pg_data_t *pgdat;
245 int i;
334 246
335 for (bank = 0; bank < mi->nr_banks; bank++) 247 start_pfn = -1UL;
336 if (mi->bank[bank].node == node) 248 end_pfn = 0;
337 free_bootmem_node(pgdat, mi->bank[bank].start,
338 mi->bank[bank].size);
339}
340 249
341/* 250 /*
342 * Initialise the bootmem allocator for all nodes. This is called 251 * Calculate the pfn range, and map the memory banks for this node.
343 * early during the architecture specific initialisation. 252 */
344 */ 253 for_each_nodebank(i, mi, node) {
345static void __init bootmem_init(struct meminfo *mi) 254 unsigned long start, end;
346{ 255 struct map_desc map;
347 struct node_info node_info[MAX_NUMNODES], *np = node_info;
348 unsigned int bootmap_pages, bootmap_pfn, map_pg;
349 int node, initrd_node;
350 256
351 bootmap_pages = find_memend_and_nodes(mi, np); 257 start = mi->bank[i].start >> PAGE_SHIFT;
352 bootmap_pfn = find_bootmap_pfn(0, mi, bootmap_pages); 258 end = (mi->bank[i].start + mi->bank[i].size) >> PAGE_SHIFT;
353 initrd_node = check_initrd(mi);
354 259
355 map_pg = bootmap_pfn; 260 if (start_pfn > start)
261 start_pfn = start;
262 if (end_pfn < end)
263 end_pfn = end;
264
265 map.physical = mi->bank[i].start;
266 map.virtual = __phys_to_virt(map.physical);
267 map.length = mi->bank[i].size;
268 map.type = MT_MEMORY;
269
270 create_mapping(&map);
271 }
356 272
357 /* 273 /*
358 * Initialise the bootmem nodes. 274 * If there is no memory in this node, ignore it.
359 *
360 * What we really want to do is:
361 *
362 * unmap_all_regions_except_kernel();
363 * for_each_node_in_reverse_order(node) {
364 * map_node(node);
365 * allocate_bootmem_map(node);
366 * init_bootmem_node(node);
367 * free_bootmem_node(node);
368 * }
369 *
370 * but this is a 2.5-type change. For now, we just set
371 * the nodes up in reverse order.
372 *
373 * (we could also do with rolling bootmem_init and paging_init
374 * into one generic "memory_init" type function).
375 */ 275 */
376 np += num_online_nodes() - 1; 276 if (end_pfn == 0)
377 for (node = num_online_nodes() - 1; node >= 0; node--, np--) { 277 return end_pfn;
378 /*
379 * If there are no pages in this node, ignore it.
380 * Note that node 0 must always have some pages.
381 */
382 if (np->end == 0 || !node_online(node)) {
383 if (node == 0)
384 BUG();
385 continue;
386 }
387 278
388 /* 279 /*
389 * Initialise the bootmem allocator. 280 * Allocate the bootmem bitmap page.
390 */ 281 */
391 init_bootmem_node(NODE_DATA(node), map_pg, np->start, np->end); 282 boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
392 free_bootmem_node_bank(node, mi); 283 boot_pfn = find_bootmap_pfn(node, mi, boot_pages);
393 map_pg += np->bootmap_pages;
394 284
395 /* 285 /*
396 * If this is node 0, we need to reserve some areas ASAP - 286 * Initialise the bootmem allocator for this node, handing the
397 * we may use bootmem on node 0 to setup the other nodes. 287 * memory banks over to bootmem.
398 */ 288 */
399 if (node == 0) 289 node_set_online(node);
400 reserve_node_zero(bootmap_pfn, bootmap_pages); 290 pgdat = NODE_DATA(node);
401 } 291 init_bootmem_node(pgdat, boot_pfn, start_pfn, end_pfn);
402 292
293 for_each_nodebank(i, mi, node)
294 free_bootmem_node(pgdat, mi->bank[i].start, mi->bank[i].size);
295
296 /*
297 * Reserve the bootmem bitmap for this node.
298 */
299 reserve_bootmem_node(pgdat, boot_pfn << PAGE_SHIFT,
300 boot_pages << PAGE_SHIFT);
403 301
404#ifdef CONFIG_BLK_DEV_INITRD 302#ifdef CONFIG_BLK_DEV_INITRD
405 if (phys_initrd_size && initrd_node >= 0) { 303 /*
406 reserve_bootmem_node(NODE_DATA(initrd_node), phys_initrd_start, 304 * If the initrd is in this node, reserve its memory.
305 */
306 if (node == initrd_node) {
307 reserve_bootmem_node(pgdat, phys_initrd_start,
407 phys_initrd_size); 308 phys_initrd_size);
408 initrd_start = __phys_to_virt(phys_initrd_start); 309 initrd_start = __phys_to_virt(phys_initrd_start);
409 initrd_end = initrd_start + phys_initrd_size; 310 initrd_end = initrd_start + phys_initrd_size;
410 } 311 }
411#endif 312#endif
412 313
413 BUG_ON(map_pg != bootmap_pfn + bootmap_pages); 314 /*
315 * Finally, reserve any node zero regions.
316 */
317 if (node == 0)
318 reserve_node_zero(pgdat);
319
320 /*
321 * initialise the zones within this node.
322 */
323 memset(zone_size, 0, sizeof(zone_size));
324 memset(zhole_size, 0, sizeof(zhole_size));
325
326 /*
327 * The size of this node has already been determined. If we need
328 * to do anything fancy with the allocation of this memory to the
329 * zones, now is the time to do it.
330 */
331 zone_size[0] = end_pfn - start_pfn;
332
333 /*
334 * For each bank in this node, calculate the size of the holes.
335 * holes = node_size - sum(bank_sizes_in_node)
336 */
337 zhole_size[0] = zone_size[0];
338 for_each_nodebank(i, mi, node)
339 zhole_size[0] -= mi->bank[i].size >> PAGE_SHIFT;
340
341 /*
342 * Adjust the sizes according to any special requirements for
343 * this machine type.
344 */
345 arch_adjust_zones(node, zone_size, zhole_size);
346
347 free_area_init_node(node, pgdat, zone_size, start_pfn, zhole_size);
348
349 return end_pfn;
414} 350}
415 351
416/* 352static void __init bootmem_init(struct meminfo *mi)
417 * paging_init() sets up the page tables, initialises the zone memory
418 * maps, and sets up the zero page, bad page and bad page tables.
419 */
420void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
421{ 353{
422 void *zero_page; 354 unsigned long addr, memend_pfn = 0;
423 int node; 355 int node, initrd_node, i;
424 356
425 bootmem_init(mi); 357 /*
358 * Invalidate the node number for empty or invalid memory banks
359 */
360 for (i = 0; i < mi->nr_banks; i++)
361 if (mi->bank[i].size == 0 || mi->bank[i].node >= MAX_NUMNODES)
362 mi->bank[i].node = -1;
426 363
427 memcpy(&meminfo, mi, sizeof(meminfo)); 364 memcpy(&meminfo, mi, sizeof(meminfo));
428 365
366#ifdef CONFIG_XIP_KERNEL
367#error needs fixing
368 p->physical = CONFIG_XIP_PHYS_ADDR & PMD_MASK;
369 p->virtual = (unsigned long)&_stext & PMD_MASK;
370 p->length = ((unsigned long)&_etext - p->virtual + ~PMD_MASK) & PMD_MASK;
371 p->type = MT_ROM;
372 p ++;
373#endif
374
429 /* 375 /*
430 * allocate the zero page. Note that we count on this going ok. 376 * Clear out all the mappings below the kernel image.
377 * FIXME: what about XIP?
431 */ 378 */
432 zero_page = alloc_bootmem_low_pages(PAGE_SIZE); 379 for (addr = 0; addr < PAGE_OFFSET; addr += PGDIR_SIZE)
380 pmd_clear(pmd_off_k(addr));
433 381
434 /* 382 /*
435 * initialise the page tables. 383 * Clear out all the kernel space mappings, except for the first
384 * memory bank, up to the end of the vmalloc region.
436 */ 385 */
437 memtable_init(mi); 386 for (addr = __phys_to_virt(mi->bank[0].start + mi->bank[0].size);
438 if (mdesc->map_io) 387 addr < VMALLOC_END; addr += PGDIR_SIZE)
439 mdesc->map_io(); 388 pmd_clear(pmd_off_k(addr));
440 local_flush_tlb_all();
441 389
442 /* 390 /*
443 * initialise the zones within each node 391 * Locate which node contains the ramdisk image, if any.
444 */ 392 */
445 for_each_online_node(node) { 393 initrd_node = check_initrd(mi);
446 unsigned long zone_size[MAX_NR_ZONES];
447 unsigned long zhole_size[MAX_NR_ZONES];
448 struct bootmem_data *bdata;
449 pg_data_t *pgdat;
450 int i;
451 394
452 /* 395 /*
453 * Initialise the zone size information. 396 * Run through each node initialising the bootmem allocator.
454 */ 397 */
455 for (i = 0; i < MAX_NR_ZONES; i++) { 398 for_each_node(node) {
456 zone_size[i] = 0; 399 unsigned long end_pfn;
457 zhole_size[i] = 0;
458 }
459 400
460 pgdat = NODE_DATA(node); 401 end_pfn = bootmem_init_node(node, initrd_node, mi);
461 bdata = pgdat->bdata;
462 402
463 /* 403 /*
464 * The size of this node has already been determined. 404 * Remember the highest memory PFN.
465 * If we need to do anything fancy with the allocation
466 * of this memory to the zones, now is the time to do
467 * it.
468 */ 405 */
469 zone_size[0] = bdata->node_low_pfn - 406 if (end_pfn > memend_pfn)
470 (bdata->node_boot_start >> PAGE_SHIFT); 407 memend_pfn = end_pfn;
408 }
471 409
472 /* 410 high_memory = __va(memend_pfn << PAGE_SHIFT);
473 * If this zone has zero size, skip it.
474 */
475 if (!zone_size[0])
476 continue;
477 411
478 /* 412 /*
479 * For each bank in this node, calculate the size of the 413 * This doesn't seem to be used by the Linux memory manager any
480 * holes. holes = node_size - sum(bank_sizes_in_node) 414 * more, but is used by ll_rw_block. If we can get rid of it, we
481 */ 415 * also get rid of some of the stuff above as well.
482 zhole_size[0] = zone_size[0]; 416 *
483 for (i = 0; i < mi->nr_banks; i++) { 417 * Note: max_low_pfn and max_pfn reflect the number of _pages_ in
484 if (mi->bank[i].node != node) 418 * the system, not the maximum PFN.
485 continue; 419 */
420 max_pfn = max_low_pfn = memend_pfn - PHYS_PFN_OFFSET;
421}
486 422
487 zhole_size[0] -= mi->bank[i].size >> PAGE_SHIFT; 423/*
488 } 424 * Set up device the mappings. Since we clear out the page tables for all
425 * mappings above VMALLOC_END, we will remove any debug device mappings.
426 * This means you have to be careful how you debug this function, or any
427 * called function. (Do it by code inspection!)
428 */
429static void __init devicemaps_init(struct machine_desc *mdesc)
430{
431 struct map_desc map;
432 unsigned long addr;
433 void *vectors;
489 434
490 /* 435 for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
491 * Adjust the sizes according to any special 436 pmd_clear(pmd_off_k(addr));
492 * requirements for this machine type.
493 */
494 arch_adjust_zones(node, zone_size, zhole_size);
495 437
496 free_area_init_node(node, pgdat, zone_size, 438 /*
497 bdata->node_boot_start >> PAGE_SHIFT, zhole_size); 439 * Map the cache flushing regions.
440 */
441#ifdef FLUSH_BASE
442 map.physical = FLUSH_BASE_PHYS;
443 map.virtual = FLUSH_BASE;
444 map.length = PGDIR_SIZE;
445 map.type = MT_CACHECLEAN;
446 create_mapping(&map);
447#endif
448#ifdef FLUSH_BASE_MINICACHE
449 map.physical = FLUSH_BASE_PHYS + PGDIR_SIZE;
450 map.virtual = FLUSH_BASE_MINICACHE;
451 map.length = PGDIR_SIZE;
452 map.type = MT_MINICLEAN;
453 create_mapping(&map);
454#endif
455
456 flush_cache_all();
457 local_flush_tlb_all();
458
459 vectors = alloc_bootmem_low_pages(PAGE_SIZE);
460 BUG_ON(!vectors);
461
462 /*
463 * Create a mapping for the machine vectors at the high-vectors
464 * location (0xffff0000). If we aren't using high-vectors, also
465 * create a mapping at the low-vectors virtual address.
466 */
467 map.physical = virt_to_phys(vectors);
468 map.virtual = 0xffff0000;
469 map.length = PAGE_SIZE;
470 map.type = MT_HIGH_VECTORS;
471 create_mapping(&map);
472
473 if (!vectors_high()) {
474 map.virtual = 0;
475 map.type = MT_LOW_VECTORS;
476 create_mapping(&map);
498 } 477 }
499 478
500 /* 479 /*
501 * finish off the bad pages once 480 * Ask the machine support to map in the statically mapped devices.
502 * the mem_map is initialised 481 * After this point, we can start to touch devices again.
482 */
483 if (mdesc->map_io)
484 mdesc->map_io();
485}
486
487/*
488 * paging_init() sets up the page tables, initialises the zone memory
489 * maps, and sets up the zero page, bad page and bad page tables.
490 */
491void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
492{
493 void *zero_page;
494
495 build_mem_type_table();
496 bootmem_init(mi);
497 devicemaps_init(mdesc);
498
499 top_pmd = pmd_off_k(0xffff0000);
500
501 /*
502 * allocate the zero page. Note that we count on this going ok.
503 */ 503 */
504 zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
504 memzero(zero_page, PAGE_SIZE); 505 memzero(zero_page, PAGE_SIZE);
505 empty_zero_page = virt_to_page(zero_page); 506 empty_zero_page = virt_to_page(zero_page);
506 flush_dcache_page(empty_zero_page); 507 flush_dcache_page(empty_zero_page);
@@ -562,10 +563,7 @@ static void __init free_unused_memmap_node(int node, struct meminfo *mi)
562 * may not be the case, especially if the user has provided the 563 * may not be the case, especially if the user has provided the
563 * information on the command line. 564 * information on the command line.
564 */ 565 */
565 for (i = 0; i < mi->nr_banks; i++) { 566 for_each_nodebank(i, mi, node) {
566 if (mi->bank[i].size == 0 || mi->bank[i].node != node)
567 continue;
568
569 bank_start = mi->bank[i].start >> PAGE_SHIFT; 567 bank_start = mi->bank[i].start >> PAGE_SHIFT;
570 if (bank_start < prev_bank_end) { 568 if (bank_start < prev_bank_end) {
571 printk(KERN_ERR "MEM: unordered memory banks. " 569 printk(KERN_ERR "MEM: unordered memory banks. "