aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/init_64.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-10-11 13:59:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-11 14:02:56 -0400
commitec8deffa33757286ba59e71d3d98173c37638b37 (patch)
tree2a176fd94d4665fe3c03f6e9f09dda8060fdb4a7 /arch/x86/mm/init_64.c
parent7cc4e87f912bbefa440a51856b8d076e5d1f554a (diff)
parent3dd392a407d15250a501fa109cc1f93fee95ef85 (diff)
Merge phase #2 (PAT updates) of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-v28-for-linus-phase2-B' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (27 commits) x86, cpa: make the kernel physical mapping initialization a two pass sequence, fix x86, pat: cleanups x86: fix pagetable init 64-bit breakage x86: track memtype for RAM in page struct x86, cpa: srlz cpa(), global flush tlb after splitting big page and before doing cpa x86, cpa: remove cpa pool code x86, cpa: no need to check alias for __set_pages_p/__set_pages_np x86, cpa: dont use large pages for kernel identity mapping with DEBUG_PAGEALLOC x86, cpa: make the kernel physical mapping initialization a two pass sequence x86, cpa: remove USER permission from the very early identity mapping attribute x86, cpa: rename PTE attribute macros for kernel direct mapping in early boot x86: make sure the CPA test code's use of _PAGE_UNUSED1 is obvious linux-next: fix x86 tree build failure x86: have set_memory_array_{uc,wb} coalesce memtypes, fix agp: enable optimized agp_alloc_pages methods x86: have set_memory_array_{uc,wb} coalesce memtypes. x86: {reverve,free}_memtype() take a physical address x86: fix pageattr-test agp: add agp_generic_destroy_pages() agp: generic_alloc_pages() ...
Diffstat (limited to 'arch/x86/mm/init_64.c')
-rw-r--r--arch/x86/mm/init_64.c110
1 files changed, 82 insertions, 28 deletions
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 770536ebf7e..fb30486c82f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -271,7 +271,8 @@ static __ref void unmap_low_page(void *adr)
271} 271}
272 272
273static unsigned long __meminit 273static unsigned long __meminit
274phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) 274phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
275 pgprot_t prot)
275{ 276{
276 unsigned pages = 0; 277 unsigned pages = 0;
277 unsigned long last_map_addr = end; 278 unsigned long last_map_addr = end;
@@ -289,36 +290,43 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
289 break; 290 break;
290 } 291 }
291 292
293 /*
294 * We will re-use the existing mapping.
295 * Xen for example has some special requirements, like mapping
296 * pagetable pages as RO. So assume someone who pre-setup
297 * these mappings are more intelligent.
298 */
292 if (pte_val(*pte)) 299 if (pte_val(*pte))
293 continue; 300 continue;
294 301
295 if (0) 302 if (0)
296 printk(" pte=%p addr=%lx pte=%016lx\n", 303 printk(" pte=%p addr=%lx pte=%016lx\n",
297 pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); 304 pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
298 set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL));
299 last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
300 pages++; 305 pages++;
306 set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot));
307 last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
301 } 308 }
309
302 update_page_count(PG_LEVEL_4K, pages); 310 update_page_count(PG_LEVEL_4K, pages);
303 311
304 return last_map_addr; 312 return last_map_addr;
305} 313}
306 314
307static unsigned long __meminit 315static unsigned long __meminit
308phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end) 316phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
317 pgprot_t prot)
309{ 318{
310 pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); 319 pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
311 320
312 return phys_pte_init(pte, address, end); 321 return phys_pte_init(pte, address, end, prot);
313} 322}
314 323
315static unsigned long __meminit 324static unsigned long __meminit
316phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, 325phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
317 unsigned long page_size_mask) 326 unsigned long page_size_mask, pgprot_t prot)
318{ 327{
319 unsigned long pages = 0; 328 unsigned long pages = 0;
320 unsigned long last_map_addr = end; 329 unsigned long last_map_addr = end;
321 unsigned long start = address;
322 330
323 int i = pmd_index(address); 331 int i = pmd_index(address);
324 332
@@ -326,6 +334,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
326 unsigned long pte_phys; 334 unsigned long pte_phys;
327 pmd_t *pmd = pmd_page + pmd_index(address); 335 pmd_t *pmd = pmd_page + pmd_index(address);
328 pte_t *pte; 336 pte_t *pte;
337 pgprot_t new_prot = prot;
329 338
330 if (address >= end) { 339 if (address >= end) {
331 if (!after_bootmem) { 340 if (!after_bootmem) {
@@ -339,27 +348,40 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
339 if (!pmd_large(*pmd)) { 348 if (!pmd_large(*pmd)) {
340 spin_lock(&init_mm.page_table_lock); 349 spin_lock(&init_mm.page_table_lock);
341 last_map_addr = phys_pte_update(pmd, address, 350 last_map_addr = phys_pte_update(pmd, address,
342 end); 351 end, prot);
343 spin_unlock(&init_mm.page_table_lock); 352 spin_unlock(&init_mm.page_table_lock);
353 continue;
344 } 354 }
345 /* Count entries we're using from level2_ident_pgt */ 355 /*
346 if (start == 0) 356 * If we are ok with PG_LEVEL_2M mapping, then we will
347 pages++; 357 * use the existing mapping,
348 continue; 358 *
359 * Otherwise, we will split the large page mapping but
360 * use the same existing protection bits except for
361 * large page, so that we don't violate Intel's TLB
362 * Application note (317080) which says, while changing
363 * the page sizes, new and old translations should
364 * not differ with respect to page frame and
365 * attributes.
366 */
367 if (page_size_mask & (1 << PG_LEVEL_2M))
368 continue;
369 new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
349 } 370 }
350 371
351 if (page_size_mask & (1<<PG_LEVEL_2M)) { 372 if (page_size_mask & (1<<PG_LEVEL_2M)) {
352 pages++; 373 pages++;
353 spin_lock(&init_mm.page_table_lock); 374 spin_lock(&init_mm.page_table_lock);
354 set_pte((pte_t *)pmd, 375 set_pte((pte_t *)pmd,
355 pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); 376 pfn_pte(address >> PAGE_SHIFT,
377 __pgprot(pgprot_val(prot) | _PAGE_PSE)));
356 spin_unlock(&init_mm.page_table_lock); 378 spin_unlock(&init_mm.page_table_lock);
357 last_map_addr = (address & PMD_MASK) + PMD_SIZE; 379 last_map_addr = (address & PMD_MASK) + PMD_SIZE;
358 continue; 380 continue;
359 } 381 }
360 382
361 pte = alloc_low_page(&pte_phys); 383 pte = alloc_low_page(&pte_phys);
362 last_map_addr = phys_pte_init(pte, address, end); 384 last_map_addr = phys_pte_init(pte, address, end, new_prot);
363 unmap_low_page(pte); 385 unmap_low_page(pte);
364 386
365 spin_lock(&init_mm.page_table_lock); 387 spin_lock(&init_mm.page_table_lock);
@@ -372,12 +394,12 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
372 394
373static unsigned long __meminit 395static unsigned long __meminit
374phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, 396phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
375 unsigned long page_size_mask) 397 unsigned long page_size_mask, pgprot_t prot)
376{ 398{
377 pmd_t *pmd = pmd_offset(pud, 0); 399 pmd_t *pmd = pmd_offset(pud, 0);
378 unsigned long last_map_addr; 400 unsigned long last_map_addr;
379 401
380 last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); 402 last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot);
381 __flush_tlb_all(); 403 __flush_tlb_all();
382 return last_map_addr; 404 return last_map_addr;
383} 405}
@@ -394,6 +416,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
394 unsigned long pmd_phys; 416 unsigned long pmd_phys;
395 pud_t *pud = pud_page + pud_index(addr); 417 pud_t *pud = pud_page + pud_index(addr);
396 pmd_t *pmd; 418 pmd_t *pmd;
419 pgprot_t prot = PAGE_KERNEL;
397 420
398 if (addr >= end) 421 if (addr >= end)
399 break; 422 break;
@@ -405,10 +428,26 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
405 } 428 }
406 429
407 if (pud_val(*pud)) { 430 if (pud_val(*pud)) {
408 if (!pud_large(*pud)) 431 if (!pud_large(*pud)) {
409 last_map_addr = phys_pmd_update(pud, addr, end, 432 last_map_addr = phys_pmd_update(pud, addr, end,
410 page_size_mask); 433 page_size_mask, prot);
411 continue; 434 continue;
435 }
436 /*
437 * If we are ok with PG_LEVEL_1G mapping, then we will
438 * use the existing mapping.
439 *
440 * Otherwise, we will split the gbpage mapping but use
441 * the same existing protection bits except for large
442 * page, so that we don't violate Intel's TLB
443 * Application note (317080) which says, while changing
444 * the page sizes, new and old translations should
445 * not differ with respect to page frame and
446 * attributes.
447 */
448 if (page_size_mask & (1 << PG_LEVEL_1G))
449 continue;
450 prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
412 } 451 }
413 452
414 if (page_size_mask & (1<<PG_LEVEL_1G)) { 453 if (page_size_mask & (1<<PG_LEVEL_1G)) {
@@ -422,7 +461,8 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
422 } 461 }
423 462
424 pmd = alloc_low_page(&pmd_phys); 463 pmd = alloc_low_page(&pmd_phys);
425 last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); 464 last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
465 prot);
426 unmap_low_page(pmd); 466 unmap_low_page(pmd);
427 467
428 spin_lock(&init_mm.page_table_lock); 468 spin_lock(&init_mm.page_table_lock);
@@ -430,6 +470,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
430 spin_unlock(&init_mm.page_table_lock); 470 spin_unlock(&init_mm.page_table_lock);
431 } 471 }
432 __flush_tlb_all(); 472 __flush_tlb_all();
473
433 update_page_count(PG_LEVEL_1G, pages); 474 update_page_count(PG_LEVEL_1G, pages);
434 475
435 return last_map_addr; 476 return last_map_addr;
@@ -446,13 +487,14 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
446 return phys_pud_init(pud, addr, end, page_size_mask); 487 return phys_pud_init(pud, addr, end, page_size_mask);
447} 488}
448 489
449static void __init find_early_table_space(unsigned long end) 490static void __init find_early_table_space(unsigned long end, int use_pse,
491 int use_gbpages)
450{ 492{
451 unsigned long puds, pmds, ptes, tables, start; 493 unsigned long puds, pmds, ptes, tables, start;
452 494
453 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; 495 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
454 tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); 496 tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
455 if (direct_gbpages) { 497 if (use_gbpages) {
456 unsigned long extra; 498 unsigned long extra;
457 extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); 499 extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
458 pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; 500 pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
@@ -460,7 +502,7 @@ static void __init find_early_table_space(unsigned long end)
460 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; 502 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
461 tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); 503 tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
462 504
463 if (cpu_has_pse) { 505 if (use_pse) {
464 unsigned long extra; 506 unsigned long extra;
465 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); 507 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
466 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; 508 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -528,6 +570,7 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start,
528 pgd_populate(&init_mm, pgd, __va(pud_phys)); 570 pgd_populate(&init_mm, pgd, __va(pud_phys));
529 spin_unlock(&init_mm.page_table_lock); 571 spin_unlock(&init_mm.page_table_lock);
530 } 572 }
573 __flush_tlb_all();
531 574
532 return last_map_addr; 575 return last_map_addr;
533} 576}
@@ -571,6 +614,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
571 614
572 struct map_range mr[NR_RANGE_MR]; 615 struct map_range mr[NR_RANGE_MR];
573 int nr_range, i; 616 int nr_range, i;
617 int use_pse, use_gbpages;
574 618
575 printk(KERN_INFO "init_memory_mapping\n"); 619 printk(KERN_INFO "init_memory_mapping\n");
576 620
@@ -584,9 +628,21 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
584 if (!after_bootmem) 628 if (!after_bootmem)
585 init_gbpages(); 629 init_gbpages();
586 630
587 if (direct_gbpages) 631#ifdef CONFIG_DEBUG_PAGEALLOC
632 /*
633 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
634 * This will simplify cpa(), which otherwise needs to support splitting
635 * large pages into small in interrupt context, etc.
636 */
637 use_pse = use_gbpages = 0;
638#else
639 use_pse = cpu_has_pse;
640 use_gbpages = direct_gbpages;
641#endif
642
643 if (use_gbpages)
588 page_size_mask |= 1 << PG_LEVEL_1G; 644 page_size_mask |= 1 << PG_LEVEL_1G;
589 if (cpu_has_pse) 645 if (use_pse)
590 page_size_mask |= 1 << PG_LEVEL_2M; 646 page_size_mask |= 1 << PG_LEVEL_2M;
591 647
592 memset(mr, 0, sizeof(mr)); 648 memset(mr, 0, sizeof(mr));
@@ -647,7 +703,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
647 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); 703 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
648 704
649 if (!after_bootmem) 705 if (!after_bootmem)
650 find_early_table_space(end); 706 find_early_table_space(end, use_pse, use_gbpages);
651 707
652 for (i = 0; i < nr_range; i++) 708 for (i = 0; i < nr_range; i++)
653 last_map_addr = kernel_physical_mapping_init( 709 last_map_addr = kernel_physical_mapping_init(
@@ -806,8 +862,6 @@ void __init mem_init(void)
806 reservedpages << (PAGE_SHIFT-10), 862 reservedpages << (PAGE_SHIFT-10),
807 datasize >> 10, 863 datasize >> 10,
808 initsize >> 10); 864 initsize >> 10);
809
810 cpa_init();
811} 865}
812 866
813void free_init_pages(char *what, unsigned long begin, unsigned long end) 867void free_init_pages(char *what, unsigned long begin, unsigned long end)