diff options
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/x86/mm/init_64.c | 149 |
1 files changed, 61 insertions, 88 deletions
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 6116ff0d7416..8c7eae490a2c 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
| @@ -270,10 +270,9 @@ static __ref void unmap_low_page(void *adr) | |||
| 270 | early_iounmap(adr, PAGE_SIZE); | 270 | early_iounmap(adr, PAGE_SIZE); |
| 271 | } | 271 | } |
| 272 | 272 | ||
| 273 | static int physical_mapping_iter; | ||
| 274 | |||
| 275 | static unsigned long __meminit | 273 | static unsigned long __meminit |
| 276 | phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) | 274 | phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, |
| 275 | pgprot_t prot) | ||
| 277 | { | 276 | { |
| 278 | unsigned pages = 0; | 277 | unsigned pages = 0; |
| 279 | unsigned long last_map_addr = end; | 278 | unsigned long last_map_addr = end; |
| @@ -291,35 +290,40 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) | |||
| 291 | break; | 290 | break; |
| 292 | } | 291 | } |
| 293 | 292 | ||
| 293 | /* | ||
| 294 | * We will re-use the existing mapping. | ||
| 295 | * Xen for example has some special requirements, like mapping | ||
| 296 | * pagetable pages as RO. So assume someone who pre-setup | ||
| 297 | * these mappings are more intelligent. | ||
| 298 | */ | ||
| 294 | if (pte_val(*pte)) | 299 | if (pte_val(*pte)) |
| 295 | goto repeat_set_pte; | 300 | continue; |
| 296 | 301 | ||
| 297 | if (0) | 302 | if (0) |
| 298 | printk(" pte=%p addr=%lx pte=%016lx\n", | 303 | printk(" pte=%p addr=%lx pte=%016lx\n", |
| 299 | pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); | 304 | pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); |
| 300 | pages++; | 305 | pages++; |
| 301 | repeat_set_pte: | 306 | set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot)); |
| 302 | set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL)); | ||
| 303 | last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; | 307 | last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; |
| 304 | } | 308 | } |
| 305 | 309 | ||
| 306 | if (physical_mapping_iter == 1) | 310 | update_page_count(PG_LEVEL_4K, pages); |
| 307 | update_page_count(PG_LEVEL_4K, pages); | ||
| 308 | 311 | ||
| 309 | return last_map_addr; | 312 | return last_map_addr; |
| 310 | } | 313 | } |
| 311 | 314 | ||
| 312 | static unsigned long __meminit | 315 | static unsigned long __meminit |
| 313 | phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end) | 316 | phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end, |
| 317 | pgprot_t prot) | ||
| 314 | { | 318 | { |
| 315 | pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); | 319 | pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); |
| 316 | 320 | ||
| 317 | return phys_pte_init(pte, address, end); | 321 | return phys_pte_init(pte, address, end, prot); |
| 318 | } | 322 | } |
| 319 | 323 | ||
| 320 | static unsigned long __meminit | 324 | static unsigned long __meminit |
| 321 | phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | 325 | phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, |
| 322 | unsigned long page_size_mask) | 326 | unsigned long page_size_mask, pgprot_t prot) |
| 323 | { | 327 | { |
| 324 | unsigned long pages = 0; | 328 | unsigned long pages = 0; |
| 325 | unsigned long last_map_addr = end; | 329 | unsigned long last_map_addr = end; |
| @@ -330,6 +334,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
| 330 | unsigned long pte_phys; | 334 | unsigned long pte_phys; |
| 331 | pmd_t *pmd = pmd_page + pmd_index(address); | 335 | pmd_t *pmd = pmd_page + pmd_index(address); |
| 332 | pte_t *pte; | 336 | pte_t *pte; |
| 337 | pgprot_t new_prot = prot; | ||
| 333 | 338 | ||
| 334 | if (address >= end) { | 339 | if (address >= end) { |
| 335 | if (!after_bootmem) { | 340 | if (!after_bootmem) { |
| @@ -343,45 +348,58 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
| 343 | if (!pmd_large(*pmd)) { | 348 | if (!pmd_large(*pmd)) { |
| 344 | spin_lock(&init_mm.page_table_lock); | 349 | spin_lock(&init_mm.page_table_lock); |
| 345 | last_map_addr = phys_pte_update(pmd, address, | 350 | last_map_addr = phys_pte_update(pmd, address, |
| 346 | end); | 351 | end, prot); |
| 347 | spin_unlock(&init_mm.page_table_lock); | 352 | spin_unlock(&init_mm.page_table_lock); |
| 348 | continue; | 353 | continue; |
| 349 | } | 354 | } |
| 350 | goto repeat_set_pte; | 355 | /* |
| 356 | * If we are ok with PG_LEVEL_2M mapping, then we will | ||
| 357 | * use the existing mapping, | ||
| 358 | * | ||
| 359 | * Otherwise, we will split the large page mapping but | ||
| 360 | * use the same existing protection bits except for | ||
| 361 | * large page, so that we don't violate Intel's TLB | ||
| 362 | * Application note (317080) which says, while changing | ||
| 363 | * the page sizes, new and old translations should | ||
| 364 | * not differ with respect to page frame and | ||
| 365 | * attributes. | ||
| 366 | */ | ||
| 367 | if (page_size_mask & (1 << PG_LEVEL_2M)) | ||
| 368 | continue; | ||
| 369 | new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); | ||
| 351 | } | 370 | } |
| 352 | 371 | ||
| 353 | if (page_size_mask & (1<<PG_LEVEL_2M)) { | 372 | if (page_size_mask & (1<<PG_LEVEL_2M)) { |
| 354 | pages++; | 373 | pages++; |
| 355 | repeat_set_pte: | ||
| 356 | spin_lock(&init_mm.page_table_lock); | 374 | spin_lock(&init_mm.page_table_lock); |
| 357 | set_pte((pte_t *)pmd, | 375 | set_pte((pte_t *)pmd, |
| 358 | pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | 376 | pfn_pte(address >> PAGE_SHIFT, |
| 377 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); | ||
| 359 | spin_unlock(&init_mm.page_table_lock); | 378 | spin_unlock(&init_mm.page_table_lock); |
| 360 | last_map_addr = (address & PMD_MASK) + PMD_SIZE; | 379 | last_map_addr = (address & PMD_MASK) + PMD_SIZE; |
| 361 | continue; | 380 | continue; |
| 362 | } | 381 | } |
| 363 | 382 | ||
| 364 | pte = alloc_low_page(&pte_phys); | 383 | pte = alloc_low_page(&pte_phys); |
| 365 | last_map_addr = phys_pte_init(pte, address, end); | 384 | last_map_addr = phys_pte_init(pte, address, end, new_prot); |
| 366 | unmap_low_page(pte); | 385 | unmap_low_page(pte); |
| 367 | 386 | ||
| 368 | spin_lock(&init_mm.page_table_lock); | 387 | spin_lock(&init_mm.page_table_lock); |
| 369 | pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); | 388 | pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); |
| 370 | spin_unlock(&init_mm.page_table_lock); | 389 | spin_unlock(&init_mm.page_table_lock); |
| 371 | } | 390 | } |
| 372 | if (physical_mapping_iter == 1) | 391 | update_page_count(PG_LEVEL_2M, pages); |
| 373 | update_page_count(PG_LEVEL_2M, pages); | ||
| 374 | return last_map_addr; | 392 | return last_map_addr; |
| 375 | } | 393 | } |
| 376 | 394 | ||
| 377 | static unsigned long __meminit | 395 | static unsigned long __meminit |
| 378 | phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, | 396 | phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, |
| 379 | unsigned long page_size_mask) | 397 | unsigned long page_size_mask, pgprot_t prot) |
| 380 | { | 398 | { |
| 381 | pmd_t *pmd = pmd_offset(pud, 0); | 399 | pmd_t *pmd = pmd_offset(pud, 0); |
| 382 | unsigned long last_map_addr; | 400 | unsigned long last_map_addr; |
| 383 | 401 | ||
| 384 | last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); | 402 | last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot); |
| 385 | __flush_tlb_all(); | 403 | __flush_tlb_all(); |
| 386 | return last_map_addr; | 404 | return last_map_addr; |
| 387 | } | 405 | } |
| @@ -398,6 +416,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
| 398 | unsigned long pmd_phys; | 416 | unsigned long pmd_phys; |
| 399 | pud_t *pud = pud_page + pud_index(addr); | 417 | pud_t *pud = pud_page + pud_index(addr); |
| 400 | pmd_t *pmd; | 418 | pmd_t *pmd; |
| 419 | pgprot_t prot = PAGE_KERNEL; | ||
| 401 | 420 | ||
| 402 | if (addr >= end) | 421 | if (addr >= end) |
| 403 | break; | 422 | break; |
| @@ -411,16 +430,28 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
| 411 | if (pud_val(*pud)) { | 430 | if (pud_val(*pud)) { |
| 412 | if (!pud_large(*pud)) { | 431 | if (!pud_large(*pud)) { |
| 413 | last_map_addr = phys_pmd_update(pud, addr, end, | 432 | last_map_addr = phys_pmd_update(pud, addr, end, |
| 414 | page_size_mask); | 433 | page_size_mask, prot); |
| 415 | continue; | 434 | continue; |
| 416 | } | 435 | } |
| 417 | 436 | /* | |
| 418 | goto repeat_set_pte; | 437 | * If we are ok with PG_LEVEL_1G mapping, then we will |
| 438 | * use the existing mapping. | ||
| 439 | * | ||
| 440 | * Otherwise, we will split the gbpage mapping but use | ||
| 441 | * the same existing protection bits except for large | ||
| 442 | * page, so that we don't violate Intel's TLB | ||
| 443 | * Application note (317080) which says, while changing | ||
| 444 | * the page sizes, new and old translations should | ||
| 445 | * not differ with respect to page frame and | ||
| 446 | * attributes. | ||
| 447 | */ | ||
| 448 | if (page_size_mask & (1 << PG_LEVEL_1G)) | ||
| 449 | continue; | ||
| 450 | prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); | ||
| 419 | } | 451 | } |
| 420 | 452 | ||
| 421 | if (page_size_mask & (1<<PG_LEVEL_1G)) { | 453 | if (page_size_mask & (1<<PG_LEVEL_1G)) { |
| 422 | pages++; | 454 | pages++; |
| 423 | repeat_set_pte: | ||
| 424 | spin_lock(&init_mm.page_table_lock); | 455 | spin_lock(&init_mm.page_table_lock); |
| 425 | set_pte((pte_t *)pud, | 456 | set_pte((pte_t *)pud, |
| 426 | pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | 457 | pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); |
| @@ -430,7 +461,8 @@ repeat_set_pte: | |||
| 430 | } | 461 | } |
| 431 | 462 | ||
| 432 | pmd = alloc_low_page(&pmd_phys); | 463 | pmd = alloc_low_page(&pmd_phys); |
| 433 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); | 464 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, |
| 465 | prot); | ||
| 434 | unmap_low_page(pmd); | 466 | unmap_low_page(pmd); |
| 435 | 467 | ||
| 436 | spin_lock(&init_mm.page_table_lock); | 468 | spin_lock(&init_mm.page_table_lock); |
| @@ -439,8 +471,7 @@ repeat_set_pte: | |||
| 439 | } | 471 | } |
| 440 | __flush_tlb_all(); | 472 | __flush_tlb_all(); |
| 441 | 473 | ||
| 442 | if (physical_mapping_iter == 1) | 474 | update_page_count(PG_LEVEL_1G, pages); |
| 443 | update_page_count(PG_LEVEL_1G, pages); | ||
| 444 | 475 | ||
| 445 | return last_map_addr; | 476 | return last_map_addr; |
| 446 | } | 477 | } |
| @@ -505,54 +536,15 @@ static void __init init_gbpages(void) | |||
| 505 | direct_gbpages = 0; | 536 | direct_gbpages = 0; |
| 506 | } | 537 | } |
| 507 | 538 | ||
| 508 | static int is_kernel(unsigned long pfn) | ||
| 509 | { | ||
| 510 | unsigned long pg_addresss = pfn << PAGE_SHIFT; | ||
| 511 | |||
| 512 | if (pg_addresss >= (unsigned long) __pa(_text) && | ||
| 513 | pg_addresss < (unsigned long) __pa(_end)) | ||
| 514 | return 1; | ||
| 515 | |||
| 516 | return 0; | ||
| 517 | } | ||
| 518 | |||
| 519 | static unsigned long __init kernel_physical_mapping_init(unsigned long start, | 539 | static unsigned long __init kernel_physical_mapping_init(unsigned long start, |
| 520 | unsigned long end, | 540 | unsigned long end, |
| 521 | unsigned long page_size_mask) | 541 | unsigned long page_size_mask) |
| 522 | { | 542 | { |
| 523 | 543 | ||
| 524 | unsigned long next, last_map_addr; | 544 | unsigned long next, last_map_addr = end; |
| 525 | u64 cached_supported_pte_mask = __supported_pte_mask; | ||
| 526 | unsigned long cache_start = start; | ||
| 527 | unsigned long cache_end = end; | ||
| 528 | |||
| 529 | /* | ||
| 530 | * First iteration will setup identity mapping using large/small pages | ||
| 531 | * based on page_size_mask, with other attributes same as set by | ||
| 532 | * the early code in head_64.S | ||
| 533 | * | ||
| 534 | * Second iteration will setup the appropriate attributes | ||
| 535 | * as desired for the kernel identity mapping. | ||
| 536 | * | ||
| 537 | * This two pass mechanism conforms to the TLB app note which says: | ||
| 538 | * | ||
| 539 | * "Software should not write to a paging-structure entry in a way | ||
| 540 | * that would change, for any linear address, both the page size | ||
| 541 | * and either the page frame or attributes." | ||
| 542 | * | ||
| 543 | * For now, only difference between very early PTE attributes used in | ||
| 544 | * head_64.S and here is _PAGE_NX. | ||
| 545 | */ | ||
| 546 | BUILD_BUG_ON((__PAGE_KERNEL_LARGE & ~__PAGE_KERNEL_IDENT_LARGE_EXEC) | ||
| 547 | != _PAGE_NX); | ||
| 548 | __supported_pte_mask &= ~(_PAGE_NX); | ||
| 549 | physical_mapping_iter = 1; | ||
| 550 | 545 | ||
| 551 | repeat: | 546 | start = (unsigned long)__va(start); |
| 552 | last_map_addr = cache_end; | 547 | end = (unsigned long)__va(end); |
| 553 | |||
| 554 | start = (unsigned long)__va(cache_start); | ||
| 555 | end = (unsigned long)__va(cache_end); | ||
| 556 | 548 | ||
| 557 | for (; start < end; start = next) { | 549 | for (; start < end; start = next) { |
| 558 | pgd_t *pgd = pgd_offset_k(start); | 550 | pgd_t *pgd = pgd_offset_k(start); |
| @@ -564,21 +556,11 @@ repeat: | |||
| 564 | next = end; | 556 | next = end; |
| 565 | 557 | ||
| 566 | if (pgd_val(*pgd)) { | 558 | if (pgd_val(*pgd)) { |
| 567 | /* | ||
| 568 | * Static identity mappings will be overwritten | ||
| 569 | * with run-time mappings. For example, this allows | ||
| 570 | * the static 0-1GB identity mapping to be mapped | ||
| 571 | * non-executable with this. | ||
| 572 | */ | ||
| 573 | if (is_kernel(pte_pfn(*((pte_t *) pgd)))) | ||
| 574 | goto realloc; | ||
| 575 | |||
| 576 | last_map_addr = phys_pud_update(pgd, __pa(start), | 559 | last_map_addr = phys_pud_update(pgd, __pa(start), |
| 577 | __pa(end), page_size_mask); | 560 | __pa(end), page_size_mask); |
| 578 | continue; | 561 | continue; |
| 579 | } | 562 | } |
| 580 | 563 | ||
| 581 | realloc: | ||
| 582 | pud = alloc_low_page(&pud_phys); | 564 | pud = alloc_low_page(&pud_phys); |
| 583 | last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), | 565 | last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), |
| 584 | page_size_mask); | 566 | page_size_mask); |
| @@ -590,15 +572,6 @@ realloc: | |||
| 590 | } | 572 | } |
| 591 | __flush_tlb_all(); | 573 | __flush_tlb_all(); |
| 592 | 574 | ||
| 593 | if (physical_mapping_iter == 1) { | ||
| 594 | physical_mapping_iter = 2; | ||
| 595 | /* | ||
| 596 | * Second iteration will set the actual desired PTE attributes. | ||
| 597 | */ | ||
| 598 | __supported_pte_mask = cached_supported_pte_mask; | ||
| 599 | goto repeat; | ||
| 600 | } | ||
| 601 | |||
| 602 | return last_map_addr; | 575 | return last_map_addr; |
| 603 | } | 576 | } |
| 604 | 577 | ||
