aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/init_64.c
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2008-06-25 00:19:19 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-08 07:11:07 -0400
commit4f9c11dd49fb73e1ec088b27ed6539681a445988 (patch)
tree3948748b72e91df6cb6bbf9656b62b5e3b416d1b /arch/x86/mm/init_64.c
parentf97013fd8f17120182aa247f360e4d2069a9db9c (diff)
x86, 64-bit: adjust mapping of physical pagetables to work with Xen
This makes a few of changes to the construction of the initial pagetables to work better with paravirt_ops/Xen. The main areas are: 1. Support non-PSE mapping of memory, since Xen doesn't currently allow 2M pages to be mapped in guests. 2. Make sure that the ioremap alias of all pages are dropped before attaching the new page to the pagetable. This avoids having writable aliases of pagetable pages. 3. Preserve existing pagetable entries, rather than overwriting. Its possible that a fair amount of pagetable has already been constructed, so reuse what's already in place rather than ignoring and overwriting it. The algorithm relies on the invariant that any page which is part of the kernel pagetable is itself mapped in the linear memory area. This way, it can avoid using ioremap on a pagetable page. The invariant holds because it maps memory from low to high addresses, and also allocates memory from low to high. Each allocated page can map at least 2M of address space, so the mapped area will always progress much faster than the allocated area. It relies on the early boot code mapping enough pages to get started. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: xen-devel <xen-devel@lists.xensource.com> Cc: Stephen Tweedie <sct@redhat.com> Cc: Eduardo Habkost <ehabkost@redhat.com> Cc: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/mm/init_64.c')
-rw-r--r--arch/x86/mm/init_64.c94
1 files changed, 82 insertions, 12 deletions
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index d5d4b04d48a..363751dc3fb 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -253,6 +253,43 @@ static __meminit void unmap_low_page(void *adr)
253 early_iounmap(adr, PAGE_SIZE); 253 early_iounmap(adr, PAGE_SIZE);
254} 254}
255 255
256static void __meminit
257phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
258{
259 unsigned pages = 0;
260 int i;
261 pte_t *pte = pte_page + pte_index(addr);
262
263 for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {
264
265 if (addr >= end) {
266 if (!after_bootmem) {
267 for(; i < PTRS_PER_PTE; i++, pte++)
268 set_pte(pte, __pte(0));
269 }
270 break;
271 }
272
273 if (pte_val(*pte))
274 continue;
275
276 if (0)
277 printk(" pte=%p addr=%lx pte=%016lx\n",
278 pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
279 set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL));
280 pages++;
281 }
282 update_page_count(PG_LEVEL_4K, pages);
283}
284
285static void __meminit
286phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end)
287{
288 pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
289
290 phys_pte_init(pte, address, end);
291}
292
256static unsigned long __meminit 293static unsigned long __meminit
257phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) 294phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
258{ 295{
@@ -261,7 +298,9 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
261 int i = pmd_index(address); 298 int i = pmd_index(address);
262 299
263 for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { 300 for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
301 unsigned long pte_phys;
264 pmd_t *pmd = pmd_page + pmd_index(address); 302 pmd_t *pmd = pmd_page + pmd_index(address);
303 pte_t *pte;
265 304
266 if (address >= end) { 305 if (address >= end) {
267 if (!after_bootmem) { 306 if (!after_bootmem) {
@@ -271,12 +310,23 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
271 break; 310 break;
272 } 311 }
273 312
274 if (pmd_val(*pmd)) 313 if (pmd_val(*pmd)) {
314 phys_pte_update(pmd, address, end);
315 continue;
316 }
317
318 if (cpu_has_pse) {
319 pages++;
320 set_pte((pte_t *)pmd,
321 pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
275 continue; 322 continue;
323 }
276 324
277 pages++; 325 pte = alloc_low_page(&pte_phys);
278 set_pte((pte_t *)pmd, 326 phys_pte_init(pte, address, end);
279 pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); 327 unmap_low_page(pte);
328
329 pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
280 } 330 }
281 update_page_count(PG_LEVEL_2M, pages); 331 update_page_count(PG_LEVEL_2M, pages);
282 return address; 332 return address;
@@ -333,11 +383,11 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
333 pmd = alloc_low_page(&pmd_phys); 383 pmd = alloc_low_page(&pmd_phys);
334 384
335 spin_lock(&init_mm.page_table_lock); 385 spin_lock(&init_mm.page_table_lock);
336 pud_populate(&init_mm, pud, __va(pmd_phys));
337 last_map_addr = phys_pmd_init(pmd, addr, end); 386 last_map_addr = phys_pmd_init(pmd, addr, end);
387 unmap_low_page(pmd);
388 pud_populate(&init_mm, pud, __va(pmd_phys));
338 spin_unlock(&init_mm.page_table_lock); 389 spin_unlock(&init_mm.page_table_lock);
339 390
340 unmap_low_page(pmd);
341 } 391 }
342 __flush_tlb_all(); 392 __flush_tlb_all();
343 update_page_count(PG_LEVEL_1G, pages); 393 update_page_count(PG_LEVEL_1G, pages);
@@ -345,16 +395,30 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
345 return last_map_addr; 395 return last_map_addr;
346} 396}
347 397
398static unsigned long __meminit
399phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end)
400{
401 pud_t *pud;
402
403 pud = (pud_t *)pgd_page_vaddr(*pgd);
404
405 return phys_pud_init(pud, addr, end);
406}
407
348static void __init find_early_table_space(unsigned long end) 408static void __init find_early_table_space(unsigned long end)
349{ 409{
350 unsigned long puds, pmds, tables, start; 410 unsigned long puds, tables, start;
351 411
352 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; 412 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
353 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE); 413 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
354 if (!direct_gbpages) { 414 if (!direct_gbpages) {
355 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; 415 unsigned long pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
356 tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE); 416 tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
357 } 417 }
418 if (!cpu_has_pse) {
419 unsigned long ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
420 tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
421 }
358 422
359 /* 423 /*
360 * RED-PEN putting page tables only on node 0 could 424 * RED-PEN putting page tables only on node 0 could
@@ -526,19 +590,25 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned lon
526 unsigned long pud_phys; 590 unsigned long pud_phys;
527 pud_t *pud; 591 pud_t *pud;
528 592
593 next = start + PGDIR_SIZE;
594 if (next > end)
595 next = end;
596
597 if (pgd_val(*pgd)) {
598 last_map_addr = phys_pud_update(pgd, __pa(start), __pa(end));
599 continue;
600 }
601
529 if (after_bootmem) 602 if (after_bootmem)
530 pud = pud_offset(pgd, start & PGDIR_MASK); 603 pud = pud_offset(pgd, start & PGDIR_MASK);
531 else 604 else
532 pud = alloc_low_page(&pud_phys); 605 pud = alloc_low_page(&pud_phys);
533 606
534 next = start + PGDIR_SIZE;
535 if (next > end)
536 next = end;
537 last_map_addr = phys_pud_init(pud, __pa(start), __pa(next)); 607 last_map_addr = phys_pud_init(pud, __pa(start), __pa(next));
608 unmap_low_page(pud);
538 if (!after_bootmem) 609 if (!after_bootmem)
539 pgd_populate(&init_mm, pgd_offset_k(start), 610 pgd_populate(&init_mm, pgd_offset_k(start),
540 __va(pud_phys)); 611 __va(pud_phys));
541 unmap_low_page(pud);
542 } 612 }
543 613
544 if (!after_bootmem) 614 if (!after_bootmem)