diff options
Diffstat (limited to 'arch/x86/mm/init_64.c')
-rw-r--r-- | arch/x86/mm/init_64.c | 110 |
1 files changed, 82 insertions, 28 deletions
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 770536ebf7e..fb30486c82f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -271,7 +271,8 @@ static __ref void unmap_low_page(void *adr) | |||
271 | } | 271 | } |
272 | 272 | ||
273 | static unsigned long __meminit | 273 | static unsigned long __meminit |
274 | phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) | 274 | phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, |
275 | pgprot_t prot) | ||
275 | { | 276 | { |
276 | unsigned pages = 0; | 277 | unsigned pages = 0; |
277 | unsigned long last_map_addr = end; | 278 | unsigned long last_map_addr = end; |
@@ -289,36 +290,43 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) | |||
289 | break; | 290 | break; |
290 | } | 291 | } |
291 | 292 | ||
293 | /* | ||
294 | * We will re-use the existing mapping. | ||
295 | * Xen for example has some special requirements, like mapping | ||
296 | * pagetable pages as RO. So assume someone who pre-setup | ||
297 | * these mappings are more intelligent. | ||
298 | */ | ||
292 | if (pte_val(*pte)) | 299 | if (pte_val(*pte)) |
293 | continue; | 300 | continue; |
294 | 301 | ||
295 | if (0) | 302 | if (0) |
296 | printk(" pte=%p addr=%lx pte=%016lx\n", | 303 | printk(" pte=%p addr=%lx pte=%016lx\n", |
297 | pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); | 304 | pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); |
298 | set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL)); | ||
299 | last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; | ||
300 | pages++; | 305 | pages++; |
306 | set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot)); | ||
307 | last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; | ||
301 | } | 308 | } |
309 | |||
302 | update_page_count(PG_LEVEL_4K, pages); | 310 | update_page_count(PG_LEVEL_4K, pages); |
303 | 311 | ||
304 | return last_map_addr; | 312 | return last_map_addr; |
305 | } | 313 | } |
306 | 314 | ||
307 | static unsigned long __meminit | 315 | static unsigned long __meminit |
308 | phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end) | 316 | phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end, |
317 | pgprot_t prot) | ||
309 | { | 318 | { |
310 | pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); | 319 | pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); |
311 | 320 | ||
312 | return phys_pte_init(pte, address, end); | 321 | return phys_pte_init(pte, address, end, prot); |
313 | } | 322 | } |
314 | 323 | ||
315 | static unsigned long __meminit | 324 | static unsigned long __meminit |
316 | phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | 325 | phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, |
317 | unsigned long page_size_mask) | 326 | unsigned long page_size_mask, pgprot_t prot) |
318 | { | 327 | { |
319 | unsigned long pages = 0; | 328 | unsigned long pages = 0; |
320 | unsigned long last_map_addr = end; | 329 | unsigned long last_map_addr = end; |
321 | unsigned long start = address; | ||
322 | 330 | ||
323 | int i = pmd_index(address); | 331 | int i = pmd_index(address); |
324 | 332 | ||
@@ -326,6 +334,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
326 | unsigned long pte_phys; | 334 | unsigned long pte_phys; |
327 | pmd_t *pmd = pmd_page + pmd_index(address); | 335 | pmd_t *pmd = pmd_page + pmd_index(address); |
328 | pte_t *pte; | 336 | pte_t *pte; |
337 | pgprot_t new_prot = prot; | ||
329 | 338 | ||
330 | if (address >= end) { | 339 | if (address >= end) { |
331 | if (!after_bootmem) { | 340 | if (!after_bootmem) { |
@@ -339,27 +348,40 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
339 | if (!pmd_large(*pmd)) { | 348 | if (!pmd_large(*pmd)) { |
340 | spin_lock(&init_mm.page_table_lock); | 349 | spin_lock(&init_mm.page_table_lock); |
341 | last_map_addr = phys_pte_update(pmd, address, | 350 | last_map_addr = phys_pte_update(pmd, address, |
342 | end); | 351 | end, prot); |
343 | spin_unlock(&init_mm.page_table_lock); | 352 | spin_unlock(&init_mm.page_table_lock); |
353 | continue; | ||
344 | } | 354 | } |
345 | /* Count entries we're using from level2_ident_pgt */ | 355 | /* |
346 | if (start == 0) | 356 | * If we are ok with PG_LEVEL_2M mapping, then we will |
347 | pages++; | 357 | * use the existing mapping, |
348 | continue; | 358 | * |
359 | * Otherwise, we will split the large page mapping but | ||
360 | * use the same existing protection bits except for | ||
361 | * large page, so that we don't violate Intel's TLB | ||
362 | * Application note (317080) which says, while changing | ||
363 | * the page sizes, new and old translations should | ||
364 | * not differ with respect to page frame and | ||
365 | * attributes. | ||
366 | */ | ||
367 | if (page_size_mask & (1 << PG_LEVEL_2M)) | ||
368 | continue; | ||
369 | new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); | ||
349 | } | 370 | } |
350 | 371 | ||
351 | if (page_size_mask & (1<<PG_LEVEL_2M)) { | 372 | if (page_size_mask & (1<<PG_LEVEL_2M)) { |
352 | pages++; | 373 | pages++; |
353 | spin_lock(&init_mm.page_table_lock); | 374 | spin_lock(&init_mm.page_table_lock); |
354 | set_pte((pte_t *)pmd, | 375 | set_pte((pte_t *)pmd, |
355 | pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | 376 | pfn_pte(address >> PAGE_SHIFT, |
377 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); | ||
356 | spin_unlock(&init_mm.page_table_lock); | 378 | spin_unlock(&init_mm.page_table_lock); |
357 | last_map_addr = (address & PMD_MASK) + PMD_SIZE; | 379 | last_map_addr = (address & PMD_MASK) + PMD_SIZE; |
358 | continue; | 380 | continue; |
359 | } | 381 | } |
360 | 382 | ||
361 | pte = alloc_low_page(&pte_phys); | 383 | pte = alloc_low_page(&pte_phys); |
362 | last_map_addr = phys_pte_init(pte, address, end); | 384 | last_map_addr = phys_pte_init(pte, address, end, new_prot); |
363 | unmap_low_page(pte); | 385 | unmap_low_page(pte); |
364 | 386 | ||
365 | spin_lock(&init_mm.page_table_lock); | 387 | spin_lock(&init_mm.page_table_lock); |
@@ -372,12 +394,12 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
372 | 394 | ||
373 | static unsigned long __meminit | 395 | static unsigned long __meminit |
374 | phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, | 396 | phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, |
375 | unsigned long page_size_mask) | 397 | unsigned long page_size_mask, pgprot_t prot) |
376 | { | 398 | { |
377 | pmd_t *pmd = pmd_offset(pud, 0); | 399 | pmd_t *pmd = pmd_offset(pud, 0); |
378 | unsigned long last_map_addr; | 400 | unsigned long last_map_addr; |
379 | 401 | ||
380 | last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); | 402 | last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot); |
381 | __flush_tlb_all(); | 403 | __flush_tlb_all(); |
382 | return last_map_addr; | 404 | return last_map_addr; |
383 | } | 405 | } |
@@ -394,6 +416,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
394 | unsigned long pmd_phys; | 416 | unsigned long pmd_phys; |
395 | pud_t *pud = pud_page + pud_index(addr); | 417 | pud_t *pud = pud_page + pud_index(addr); |
396 | pmd_t *pmd; | 418 | pmd_t *pmd; |
419 | pgprot_t prot = PAGE_KERNEL; | ||
397 | 420 | ||
398 | if (addr >= end) | 421 | if (addr >= end) |
399 | break; | 422 | break; |
@@ -405,10 +428,26 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
405 | } | 428 | } |
406 | 429 | ||
407 | if (pud_val(*pud)) { | 430 | if (pud_val(*pud)) { |
408 | if (!pud_large(*pud)) | 431 | if (!pud_large(*pud)) { |
409 | last_map_addr = phys_pmd_update(pud, addr, end, | 432 | last_map_addr = phys_pmd_update(pud, addr, end, |
410 | page_size_mask); | 433 | page_size_mask, prot); |
411 | continue; | 434 | continue; |
435 | } | ||
436 | /* | ||
437 | * If we are ok with PG_LEVEL_1G mapping, then we will | ||
438 | * use the existing mapping. | ||
439 | * | ||
440 | * Otherwise, we will split the gbpage mapping but use | ||
441 | * the same existing protection bits except for large | ||
442 | * page, so that we don't violate Intel's TLB | ||
443 | * Application note (317080) which says, while changing | ||
444 | * the page sizes, new and old translations should | ||
445 | * not differ with respect to page frame and | ||
446 | * attributes. | ||
447 | */ | ||
448 | if (page_size_mask & (1 << PG_LEVEL_1G)) | ||
449 | continue; | ||
450 | prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); | ||
412 | } | 451 | } |
413 | 452 | ||
414 | if (page_size_mask & (1<<PG_LEVEL_1G)) { | 453 | if (page_size_mask & (1<<PG_LEVEL_1G)) { |
@@ -422,7 +461,8 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
422 | } | 461 | } |
423 | 462 | ||
424 | pmd = alloc_low_page(&pmd_phys); | 463 | pmd = alloc_low_page(&pmd_phys); |
425 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); | 464 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, |
465 | prot); | ||
426 | unmap_low_page(pmd); | 466 | unmap_low_page(pmd); |
427 | 467 | ||
428 | spin_lock(&init_mm.page_table_lock); | 468 | spin_lock(&init_mm.page_table_lock); |
@@ -430,6 +470,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
430 | spin_unlock(&init_mm.page_table_lock); | 470 | spin_unlock(&init_mm.page_table_lock); |
431 | } | 471 | } |
432 | __flush_tlb_all(); | 472 | __flush_tlb_all(); |
473 | |||
433 | update_page_count(PG_LEVEL_1G, pages); | 474 | update_page_count(PG_LEVEL_1G, pages); |
434 | 475 | ||
435 | return last_map_addr; | 476 | return last_map_addr; |
@@ -446,13 +487,14 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, | |||
446 | return phys_pud_init(pud, addr, end, page_size_mask); | 487 | return phys_pud_init(pud, addr, end, page_size_mask); |
447 | } | 488 | } |
448 | 489 | ||
449 | static void __init find_early_table_space(unsigned long end) | 490 | static void __init find_early_table_space(unsigned long end, int use_pse, |
491 | int use_gbpages) | ||
450 | { | 492 | { |
451 | unsigned long puds, pmds, ptes, tables, start; | 493 | unsigned long puds, pmds, ptes, tables, start; |
452 | 494 | ||
453 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; | 495 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; |
454 | tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); | 496 | tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); |
455 | if (direct_gbpages) { | 497 | if (use_gbpages) { |
456 | unsigned long extra; | 498 | unsigned long extra; |
457 | extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); | 499 | extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); |
458 | pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; | 500 | pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; |
@@ -460,7 +502,7 @@ static void __init find_early_table_space(unsigned long end) | |||
460 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; | 502 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; |
461 | tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); | 503 | tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); |
462 | 504 | ||
463 | if (cpu_has_pse) { | 505 | if (use_pse) { |
464 | unsigned long extra; | 506 | unsigned long extra; |
465 | extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); | 507 | extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); |
466 | ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; | 508 | ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; |
@@ -528,6 +570,7 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start, | |||
528 | pgd_populate(&init_mm, pgd, __va(pud_phys)); | 570 | pgd_populate(&init_mm, pgd, __va(pud_phys)); |
529 | spin_unlock(&init_mm.page_table_lock); | 571 | spin_unlock(&init_mm.page_table_lock); |
530 | } | 572 | } |
573 | __flush_tlb_all(); | ||
531 | 574 | ||
532 | return last_map_addr; | 575 | return last_map_addr; |
533 | } | 576 | } |
@@ -571,6 +614,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
571 | 614 | ||
572 | struct map_range mr[NR_RANGE_MR]; | 615 | struct map_range mr[NR_RANGE_MR]; |
573 | int nr_range, i; | 616 | int nr_range, i; |
617 | int use_pse, use_gbpages; | ||
574 | 618 | ||
575 | printk(KERN_INFO "init_memory_mapping\n"); | 619 | printk(KERN_INFO "init_memory_mapping\n"); |
576 | 620 | ||
@@ -584,9 +628,21 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
584 | if (!after_bootmem) | 628 | if (!after_bootmem) |
585 | init_gbpages(); | 629 | init_gbpages(); |
586 | 630 | ||
587 | if (direct_gbpages) | 631 | #ifdef CONFIG_DEBUG_PAGEALLOC |
632 | /* | ||
633 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | ||
634 | * This will simplify cpa(), which otherwise needs to support splitting | ||
635 | * large pages into small in interrupt context, etc. | ||
636 | */ | ||
637 | use_pse = use_gbpages = 0; | ||
638 | #else | ||
639 | use_pse = cpu_has_pse; | ||
640 | use_gbpages = direct_gbpages; | ||
641 | #endif | ||
642 | |||
643 | if (use_gbpages) | ||
588 | page_size_mask |= 1 << PG_LEVEL_1G; | 644 | page_size_mask |= 1 << PG_LEVEL_1G; |
589 | if (cpu_has_pse) | 645 | if (use_pse) |
590 | page_size_mask |= 1 << PG_LEVEL_2M; | 646 | page_size_mask |= 1 << PG_LEVEL_2M; |
591 | 647 | ||
592 | memset(mr, 0, sizeof(mr)); | 648 | memset(mr, 0, sizeof(mr)); |
@@ -647,7 +703,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
647 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); | 703 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); |
648 | 704 | ||
649 | if (!after_bootmem) | 705 | if (!after_bootmem) |
650 | find_early_table_space(end); | 706 | find_early_table_space(end, use_pse, use_gbpages); |
651 | 707 | ||
652 | for (i = 0; i < nr_range; i++) | 708 | for (i = 0; i < nr_range; i++) |
653 | last_map_addr = kernel_physical_mapping_init( | 709 | last_map_addr = kernel_physical_mapping_init( |
@@ -806,8 +862,6 @@ void __init mem_init(void) | |||
806 | reservedpages << (PAGE_SHIFT-10), | 862 | reservedpages << (PAGE_SHIFT-10), |
807 | datasize >> 10, | 863 | datasize >> 10, |
808 | initsize >> 10); | 864 | initsize >> 10); |
809 | |||
810 | cpa_init(); | ||
811 | } | 865 | } |
812 | 866 | ||
813 | void free_init_pages(char *what, unsigned long begin, unsigned long end) | 867 | void free_init_pages(char *what, unsigned long begin, unsigned long end) |