diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-25 20:32:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-25 20:32:28 -0400 |
commit | 77cd3d0c43b7e6c0bb49ca641cf936891f6e1766 (patch) | |
tree | 44885bb7ec9def5a34b8e9f2073a166f78bddcf9 /arch/x86/mm | |
parent | 0f657262d5f99ad86b9a63fb5dcd29036c2ed916 (diff) | |
parent | 6a79296cb15d947bcb4558011fe066e5d8252b35 (diff) |
Merge branch 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 boot updates from Ingo Molnar:
"The main changes:
- add initial commits to randomize kernel memory section virtual
addresses, enabled via a new kernel option: RANDOMIZE_MEMORY
(Thomas Garnier, Kees Cook, Baoquan He, Yinghai Lu)
- enhance KASLR (RANDOMIZE_BASE) physical memory randomization (Kees
Cook)
- EBDA/BIOS region boot quirk cleanups (Andy Lutomirski, Ingo Molnar)
- misc cleanups/fixes"
* 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/boot: Simplify EBDA-vs-BIOS reservation logic
x86/boot: Clarify what x86_legacy_features.reserve_bios_regions does
x86/boot: Reorganize and clean up the BIOS area reservation code
x86/mm: Do not reference phys addr beyond kernel
x86/mm: Add memory hotplug support for KASLR memory randomization
x86/mm: Enable KASLR for vmalloc memory regions
x86/mm: Enable KASLR for physical mapping memory regions
x86/mm: Implement ASLR for kernel memory regions
x86/mm: Separate variable for trampoline PGD
x86/mm: Add PUD VA support for physical mapping
x86/mm: Update physical mapping variable names
x86/mm: Refactor KASLR entropy functions
x86/KASLR: Fix boot crash with certain memory configurations
x86/boot/64: Add forgotten end of function marker
x86/KASLR: Allow randomization below the load address
x86/KASLR: Extend kernel image physical address randomization to addresses larger than 4G
x86/KASLR: Randomize virtual address separately
x86/KASLR: Clarify identity map interface
x86/boot: Refuse to build with data relocations
x86/KASLR, x86/power: Remove x86 hibernation restrictions
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 16 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 167 | ||||
-rw-r--r-- | arch/x86/mm/kaslr.c | 172 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 12 |
6 files changed, 300 insertions, 72 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 62c0043a5fd5..96d2b847e09e 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -37,4 +37,5 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o | |||
37 | 37 | ||
38 | obj-$(CONFIG_X86_INTEL_MPX) += mpx.o | 38 | obj-$(CONFIG_X86_INTEL_MPX) += mpx.o |
39 | obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o | 39 | obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o |
40 | obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o | ||
40 | 41 | ||
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 99bfb192803f..9a17250bcbe0 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -72,9 +72,9 @@ static struct addr_marker address_markers[] = { | |||
72 | { 0, "User Space" }, | 72 | { 0, "User Space" }, |
73 | #ifdef CONFIG_X86_64 | 73 | #ifdef CONFIG_X86_64 |
74 | { 0x8000000000000000UL, "Kernel Space" }, | 74 | { 0x8000000000000000UL, "Kernel Space" }, |
75 | { PAGE_OFFSET, "Low Kernel Mapping" }, | 75 | { 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, |
76 | { VMALLOC_START, "vmalloc() Area" }, | 76 | { 0/* VMALLOC_START */, "vmalloc() Area" }, |
77 | { VMEMMAP_START, "Vmemmap" }, | 77 | { 0/* VMEMMAP_START */, "Vmemmap" }, |
78 | # ifdef CONFIG_X86_ESPFIX64 | 78 | # ifdef CONFIG_X86_ESPFIX64 |
79 | { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, | 79 | { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, |
80 | # endif | 80 | # endif |
@@ -434,8 +434,16 @@ void ptdump_walk_pgd_level_checkwx(void) | |||
434 | 434 | ||
435 | static int __init pt_dump_init(void) | 435 | static int __init pt_dump_init(void) |
436 | { | 436 | { |
437 | /* | ||
438 | * Various markers are not compile-time constants, so assign them | ||
439 | * here. | ||
440 | */ | ||
441 | #ifdef CONFIG_X86_64 | ||
442 | address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET; | ||
443 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; | ||
444 | address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START; | ||
445 | #endif | ||
437 | #ifdef CONFIG_X86_32 | 446 | #ifdef CONFIG_X86_32 |
438 | /* Not a compile-time constant on x86-32 */ | ||
439 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; | 447 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; |
440 | address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; | 448 | address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; |
441 | # ifdef CONFIG_HIGHMEM | 449 | # ifdef CONFIG_HIGHMEM |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 372aad2b3291..cc82830bc8c4 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <asm/proto.h> | 17 | #include <asm/proto.h> |
18 | #include <asm/dma.h> /* for MAX_DMA_PFN */ | 18 | #include <asm/dma.h> /* for MAX_DMA_PFN */ |
19 | #include <asm/microcode.h> | 19 | #include <asm/microcode.h> |
20 | #include <asm/kaslr.h> | ||
20 | 21 | ||
21 | /* | 22 | /* |
22 | * We need to define the tracepoints somewhere, and tlb.c | 23 | * We need to define the tracepoints somewhere, and tlb.c |
@@ -590,6 +591,9 @@ void __init init_mem_mapping(void) | |||
590 | /* the ISA range is always mapped regardless of memory holes */ | 591 | /* the ISA range is always mapped regardless of memory holes */ |
591 | init_memory_mapping(0, ISA_END_ADDRESS); | 592 | init_memory_mapping(0, ISA_END_ADDRESS); |
592 | 593 | ||
594 | /* Init the trampoline, possibly with KASLR memory offset */ | ||
595 | init_trampoline(); | ||
596 | |||
593 | /* | 597 | /* |
594 | * If the allocation is in bottom-up direction, we setup direct mapping | 598 | * If the allocation is in bottom-up direction, we setup direct mapping |
595 | * in bottom-up, otherwise we setup direct mapping in top-down. | 599 | * in bottom-up, otherwise we setup direct mapping in top-down. |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e14f87057c3f..53cc2256cf23 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -328,22 +328,30 @@ void __init cleanup_highmap(void) | |||
328 | } | 328 | } |
329 | } | 329 | } |
330 | 330 | ||
331 | /* | ||
332 | * Create PTE level page table mapping for physical addresses. | ||
333 | * It returns the last physical address mapped. | ||
334 | */ | ||
331 | static unsigned long __meminit | 335 | static unsigned long __meminit |
332 | phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, | 336 | phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end, |
333 | pgprot_t prot) | 337 | pgprot_t prot) |
334 | { | 338 | { |
335 | unsigned long pages = 0, next; | 339 | unsigned long pages = 0, paddr_next; |
336 | unsigned long last_map_addr = end; | 340 | unsigned long paddr_last = paddr_end; |
341 | pte_t *pte; | ||
337 | int i; | 342 | int i; |
338 | 343 | ||
339 | pte_t *pte = pte_page + pte_index(addr); | 344 | pte = pte_page + pte_index(paddr); |
345 | i = pte_index(paddr); | ||
340 | 346 | ||
341 | for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) { | 347 | for (; i < PTRS_PER_PTE; i++, paddr = paddr_next, pte++) { |
342 | next = (addr & PAGE_MASK) + PAGE_SIZE; | 348 | paddr_next = (paddr & PAGE_MASK) + PAGE_SIZE; |
343 | if (addr >= end) { | 349 | if (paddr >= paddr_end) { |
344 | if (!after_bootmem && | 350 | if (!after_bootmem && |
345 | !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) && | 351 | !e820_any_mapped(paddr & PAGE_MASK, paddr_next, |
346 | !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN)) | 352 | E820_RAM) && |
353 | !e820_any_mapped(paddr & PAGE_MASK, paddr_next, | ||
354 | E820_RESERVED_KERN)) | ||
347 | set_pte(pte, __pte(0)); | 355 | set_pte(pte, __pte(0)); |
348 | continue; | 356 | continue; |
349 | } | 357 | } |
@@ -361,37 +369,44 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, | |||
361 | } | 369 | } |
362 | 370 | ||
363 | if (0) | 371 | if (0) |
364 | printk(" pte=%p addr=%lx pte=%016lx\n", | 372 | pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr, |
365 | pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); | 373 | pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte); |
366 | pages++; | 374 | pages++; |
367 | set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot)); | 375 | set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot)); |
368 | last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; | 376 | paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE; |
369 | } | 377 | } |
370 | 378 | ||
371 | update_page_count(PG_LEVEL_4K, pages); | 379 | update_page_count(PG_LEVEL_4K, pages); |
372 | 380 | ||
373 | return last_map_addr; | 381 | return paddr_last; |
374 | } | 382 | } |
375 | 383 | ||
384 | /* | ||
385 | * Create PMD level page table mapping for physical addresses. The virtual | ||
386 | * and physical address have to be aligned at this level. | ||
387 | * It returns the last physical address mapped. | ||
388 | */ | ||
376 | static unsigned long __meminit | 389 | static unsigned long __meminit |
377 | phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | 390 | phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, |
378 | unsigned long page_size_mask, pgprot_t prot) | 391 | unsigned long page_size_mask, pgprot_t prot) |
379 | { | 392 | { |
380 | unsigned long pages = 0, next; | 393 | unsigned long pages = 0, paddr_next; |
381 | unsigned long last_map_addr = end; | 394 | unsigned long paddr_last = paddr_end; |
382 | 395 | ||
383 | int i = pmd_index(address); | 396 | int i = pmd_index(paddr); |
384 | 397 | ||
385 | for (; i < PTRS_PER_PMD; i++, address = next) { | 398 | for (; i < PTRS_PER_PMD; i++, paddr = paddr_next) { |
386 | pmd_t *pmd = pmd_page + pmd_index(address); | 399 | pmd_t *pmd = pmd_page + pmd_index(paddr); |
387 | pte_t *pte; | 400 | pte_t *pte; |
388 | pgprot_t new_prot = prot; | 401 | pgprot_t new_prot = prot; |
389 | 402 | ||
390 | next = (address & PMD_MASK) + PMD_SIZE; | 403 | paddr_next = (paddr & PMD_MASK) + PMD_SIZE; |
391 | if (address >= end) { | 404 | if (paddr >= paddr_end) { |
392 | if (!after_bootmem && | 405 | if (!after_bootmem && |
393 | !e820_any_mapped(address & PMD_MASK, next, E820_RAM) && | 406 | !e820_any_mapped(paddr & PMD_MASK, paddr_next, |
394 | !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN)) | 407 | E820_RAM) && |
408 | !e820_any_mapped(paddr & PMD_MASK, paddr_next, | ||
409 | E820_RESERVED_KERN)) | ||
395 | set_pmd(pmd, __pmd(0)); | 410 | set_pmd(pmd, __pmd(0)); |
396 | continue; | 411 | continue; |
397 | } | 412 | } |
@@ -400,8 +415,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
400 | if (!pmd_large(*pmd)) { | 415 | if (!pmd_large(*pmd)) { |
401 | spin_lock(&init_mm.page_table_lock); | 416 | spin_lock(&init_mm.page_table_lock); |
402 | pte = (pte_t *)pmd_page_vaddr(*pmd); | 417 | pte = (pte_t *)pmd_page_vaddr(*pmd); |
403 | last_map_addr = phys_pte_init(pte, address, | 418 | paddr_last = phys_pte_init(pte, paddr, |
404 | end, prot); | 419 | paddr_end, prot); |
405 | spin_unlock(&init_mm.page_table_lock); | 420 | spin_unlock(&init_mm.page_table_lock); |
406 | continue; | 421 | continue; |
407 | } | 422 | } |
@@ -420,7 +435,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
420 | if (page_size_mask & (1 << PG_LEVEL_2M)) { | 435 | if (page_size_mask & (1 << PG_LEVEL_2M)) { |
421 | if (!after_bootmem) | 436 | if (!after_bootmem) |
422 | pages++; | 437 | pages++; |
423 | last_map_addr = next; | 438 | paddr_last = paddr_next; |
424 | continue; | 439 | continue; |
425 | } | 440 | } |
426 | new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); | 441 | new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); |
@@ -430,42 +445,54 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
430 | pages++; | 445 | pages++; |
431 | spin_lock(&init_mm.page_table_lock); | 446 | spin_lock(&init_mm.page_table_lock); |
432 | set_pte((pte_t *)pmd, | 447 | set_pte((pte_t *)pmd, |
433 | pfn_pte((address & PMD_MASK) >> PAGE_SHIFT, | 448 | pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT, |
434 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); | 449 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); |
435 | spin_unlock(&init_mm.page_table_lock); | 450 | spin_unlock(&init_mm.page_table_lock); |
436 | last_map_addr = next; | 451 | paddr_last = paddr_next; |
437 | continue; | 452 | continue; |
438 | } | 453 | } |
439 | 454 | ||
440 | pte = alloc_low_page(); | 455 | pte = alloc_low_page(); |
441 | last_map_addr = phys_pte_init(pte, address, end, new_prot); | 456 | paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot); |
442 | 457 | ||
443 | spin_lock(&init_mm.page_table_lock); | 458 | spin_lock(&init_mm.page_table_lock); |
444 | pmd_populate_kernel(&init_mm, pmd, pte); | 459 | pmd_populate_kernel(&init_mm, pmd, pte); |
445 | spin_unlock(&init_mm.page_table_lock); | 460 | spin_unlock(&init_mm.page_table_lock); |
446 | } | 461 | } |
447 | update_page_count(PG_LEVEL_2M, pages); | 462 | update_page_count(PG_LEVEL_2M, pages); |
448 | return last_map_addr; | 463 | return paddr_last; |
449 | } | 464 | } |
450 | 465 | ||
466 | /* | ||
467 | * Create PUD level page table mapping for physical addresses. The virtual | ||
468 | * and physical address do not have to be aligned at this level. KASLR can | ||
469 | * randomize virtual addresses up to this level. | ||
470 | * It returns the last physical address mapped. | ||
471 | */ | ||
451 | static unsigned long __meminit | 472 | static unsigned long __meminit |
452 | phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | 473 | phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, |
453 | unsigned long page_size_mask) | 474 | unsigned long page_size_mask) |
454 | { | 475 | { |
455 | unsigned long pages = 0, next; | 476 | unsigned long pages = 0, paddr_next; |
456 | unsigned long last_map_addr = end; | 477 | unsigned long paddr_last = paddr_end; |
457 | int i = pud_index(addr); | 478 | unsigned long vaddr = (unsigned long)__va(paddr); |
479 | int i = pud_index(vaddr); | ||
458 | 480 | ||
459 | for (; i < PTRS_PER_PUD; i++, addr = next) { | 481 | for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) { |
460 | pud_t *pud = pud_page + pud_index(addr); | 482 | pud_t *pud; |
461 | pmd_t *pmd; | 483 | pmd_t *pmd; |
462 | pgprot_t prot = PAGE_KERNEL; | 484 | pgprot_t prot = PAGE_KERNEL; |
463 | 485 | ||
464 | next = (addr & PUD_MASK) + PUD_SIZE; | 486 | vaddr = (unsigned long)__va(paddr); |
465 | if (addr >= end) { | 487 | pud = pud_page + pud_index(vaddr); |
488 | paddr_next = (paddr & PUD_MASK) + PUD_SIZE; | ||
489 | |||
490 | if (paddr >= paddr_end) { | ||
466 | if (!after_bootmem && | 491 | if (!after_bootmem && |
467 | !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) && | 492 | !e820_any_mapped(paddr & PUD_MASK, paddr_next, |
468 | !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN)) | 493 | E820_RAM) && |
494 | !e820_any_mapped(paddr & PUD_MASK, paddr_next, | ||
495 | E820_RESERVED_KERN)) | ||
469 | set_pud(pud, __pud(0)); | 496 | set_pud(pud, __pud(0)); |
470 | continue; | 497 | continue; |
471 | } | 498 | } |
@@ -473,8 +500,10 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
473 | if (!pud_none(*pud)) { | 500 | if (!pud_none(*pud)) { |
474 | if (!pud_large(*pud)) { | 501 | if (!pud_large(*pud)) { |
475 | pmd = pmd_offset(pud, 0); | 502 | pmd = pmd_offset(pud, 0); |
476 | last_map_addr = phys_pmd_init(pmd, addr, end, | 503 | paddr_last = phys_pmd_init(pmd, paddr, |
477 | page_size_mask, prot); | 504 | paddr_end, |
505 | page_size_mask, | ||
506 | prot); | ||
478 | __flush_tlb_all(); | 507 | __flush_tlb_all(); |
479 | continue; | 508 | continue; |
480 | } | 509 | } |
@@ -493,7 +522,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
493 | if (page_size_mask & (1 << PG_LEVEL_1G)) { | 522 | if (page_size_mask & (1 << PG_LEVEL_1G)) { |
494 | if (!after_bootmem) | 523 | if (!after_bootmem) |
495 | pages++; | 524 | pages++; |
496 | last_map_addr = next; | 525 | paddr_last = paddr_next; |
497 | continue; | 526 | continue; |
498 | } | 527 | } |
499 | prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); | 528 | prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); |
@@ -503,16 +532,16 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
503 | pages++; | 532 | pages++; |
504 | spin_lock(&init_mm.page_table_lock); | 533 | spin_lock(&init_mm.page_table_lock); |
505 | set_pte((pte_t *)pud, | 534 | set_pte((pte_t *)pud, |
506 | pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT, | 535 | pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT, |
507 | PAGE_KERNEL_LARGE)); | 536 | PAGE_KERNEL_LARGE)); |
508 | spin_unlock(&init_mm.page_table_lock); | 537 | spin_unlock(&init_mm.page_table_lock); |
509 | last_map_addr = next; | 538 | paddr_last = paddr_next; |
510 | continue; | 539 | continue; |
511 | } | 540 | } |
512 | 541 | ||
513 | pmd = alloc_low_page(); | 542 | pmd = alloc_low_page(); |
514 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, | 543 | paddr_last = phys_pmd_init(pmd, paddr, paddr_end, |
515 | prot); | 544 | page_size_mask, prot); |
516 | 545 | ||
517 | spin_lock(&init_mm.page_table_lock); | 546 | spin_lock(&init_mm.page_table_lock); |
518 | pud_populate(&init_mm, pud, pmd); | 547 | pud_populate(&init_mm, pud, pmd); |
@@ -522,38 +551,44 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
522 | 551 | ||
523 | update_page_count(PG_LEVEL_1G, pages); | 552 | update_page_count(PG_LEVEL_1G, pages); |
524 | 553 | ||
525 | return last_map_addr; | 554 | return paddr_last; |
526 | } | 555 | } |
527 | 556 | ||
557 | /* | ||
558 | * Create page table mapping for the physical memory for specific physical | ||
559 | * addresses. The virtual and physical addresses have to be aligned on PMD level | ||
560 | * down. It returns the last physical address mapped. | ||
561 | */ | ||
528 | unsigned long __meminit | 562 | unsigned long __meminit |
529 | kernel_physical_mapping_init(unsigned long start, | 563 | kernel_physical_mapping_init(unsigned long paddr_start, |
530 | unsigned long end, | 564 | unsigned long paddr_end, |
531 | unsigned long page_size_mask) | 565 | unsigned long page_size_mask) |
532 | { | 566 | { |
533 | bool pgd_changed = false; | 567 | bool pgd_changed = false; |
534 | unsigned long next, last_map_addr = end; | 568 | unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last; |
535 | unsigned long addr; | ||
536 | 569 | ||
537 | start = (unsigned long)__va(start); | 570 | paddr_last = paddr_end; |
538 | end = (unsigned long)__va(end); | 571 | vaddr = (unsigned long)__va(paddr_start); |
539 | addr = start; | 572 | vaddr_end = (unsigned long)__va(paddr_end); |
573 | vaddr_start = vaddr; | ||
540 | 574 | ||
541 | for (; start < end; start = next) { | 575 | for (; vaddr < vaddr_end; vaddr = vaddr_next) { |
542 | pgd_t *pgd = pgd_offset_k(start); | 576 | pgd_t *pgd = pgd_offset_k(vaddr); |
543 | pud_t *pud; | 577 | pud_t *pud; |
544 | 578 | ||
545 | next = (start & PGDIR_MASK) + PGDIR_SIZE; | 579 | vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE; |
546 | 580 | ||
547 | if (pgd_val(*pgd)) { | 581 | if (pgd_val(*pgd)) { |
548 | pud = (pud_t *)pgd_page_vaddr(*pgd); | 582 | pud = (pud_t *)pgd_page_vaddr(*pgd); |
549 | last_map_addr = phys_pud_init(pud, __pa(start), | 583 | paddr_last = phys_pud_init(pud, __pa(vaddr), |
550 | __pa(end), page_size_mask); | 584 | __pa(vaddr_end), |
585 | page_size_mask); | ||
551 | continue; | 586 | continue; |
552 | } | 587 | } |
553 | 588 | ||
554 | pud = alloc_low_page(); | 589 | pud = alloc_low_page(); |
555 | last_map_addr = phys_pud_init(pud, __pa(start), __pa(end), | 590 | paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end), |
556 | page_size_mask); | 591 | page_size_mask); |
557 | 592 | ||
558 | spin_lock(&init_mm.page_table_lock); | 593 | spin_lock(&init_mm.page_table_lock); |
559 | pgd_populate(&init_mm, pgd, pud); | 594 | pgd_populate(&init_mm, pgd, pud); |
@@ -562,11 +597,11 @@ kernel_physical_mapping_init(unsigned long start, | |||
562 | } | 597 | } |
563 | 598 | ||
564 | if (pgd_changed) | 599 | if (pgd_changed) |
565 | sync_global_pgds(addr, end - 1, 0); | 600 | sync_global_pgds(vaddr_start, vaddr_end - 1, 0); |
566 | 601 | ||
567 | __flush_tlb_all(); | 602 | __flush_tlb_all(); |
568 | 603 | ||
569 | return last_map_addr; | 604 | return paddr_last; |
570 | } | 605 | } |
571 | 606 | ||
572 | #ifndef CONFIG_NUMA | 607 | #ifndef CONFIG_NUMA |
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c new file mode 100644 index 000000000000..26dccd6c0df1 --- /dev/null +++ b/arch/x86/mm/kaslr.c | |||
@@ -0,0 +1,172 @@ | |||
1 | /* | ||
2 | * This file implements KASLR memory randomization for x86_64. It randomizes | ||
3 | * the virtual address space of kernel memory regions (physical memory | ||
4 | * mapping, vmalloc & vmemmap) for x86_64. This security feature mitigates | ||
5 | * exploits relying on predictable kernel addresses. | ||
6 | * | ||
7 | * Entropy is generated using the KASLR early boot functions now shared in | ||
8 | * the lib directory (originally written by Kees Cook). Randomization is | ||
9 | * done on PGD & PUD page table levels to increase possible addresses. The | ||
10 | * physical memory mapping code was adapted to support PUD level virtual | ||
11 | * addresses. This implementation on the best configuration provides 30,000 | ||
12 | * possible virtual addresses in average for each memory region. An additional | ||
13 | * low memory page is used to ensure each CPU can start with a PGD aligned | ||
14 | * virtual address (for realmode). | ||
15 | * | ||
16 | * The order of each memory region is not changed. The feature looks at | ||
17 | * the available space for the regions based on different configuration | ||
18 | * options and randomizes the base and space between each. The size of the | ||
19 | * physical memory mapping is the available physical memory. | ||
20 | */ | ||
21 | |||
22 | #include <linux/kernel.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <linux/random.h> | ||
25 | |||
26 | #include <asm/pgalloc.h> | ||
27 | #include <asm/pgtable.h> | ||
28 | #include <asm/setup.h> | ||
29 | #include <asm/kaslr.h> | ||
30 | |||
31 | #include "mm_internal.h" | ||
32 | |||
33 | #define TB_SHIFT 40 | ||
34 | |||
35 | /* | ||
36 | * Virtual address start and end range for randomization. The end changes base | ||
37 | * on configuration to have the highest amount of space for randomization. | ||
38 | * It increases the possible random position for each randomized region. | ||
39 | * | ||
40 | * You need to add an if/def entry if you introduce a new memory region | ||
41 | * compatible with KASLR. Your entry must be in logical order with memory | ||
42 | * layout. For example, ESPFIX is before EFI because its virtual address is | ||
43 | * before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to | ||
44 | * ensure that this order is correct and won't be changed. | ||
45 | */ | ||
46 | static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; | ||
47 | static const unsigned long vaddr_end = VMEMMAP_START; | ||
48 | |||
49 | /* Default values */ | ||
50 | unsigned long page_offset_base = __PAGE_OFFSET_BASE; | ||
51 | EXPORT_SYMBOL(page_offset_base); | ||
52 | unsigned long vmalloc_base = __VMALLOC_BASE; | ||
53 | EXPORT_SYMBOL(vmalloc_base); | ||
54 | |||
55 | /* | ||
56 | * Memory regions randomized by KASLR (except modules that use a separate logic | ||
57 | * earlier during boot). The list is ordered based on virtual addresses. This | ||
58 | * order is kept after randomization. | ||
59 | */ | ||
60 | static __initdata struct kaslr_memory_region { | ||
61 | unsigned long *base; | ||
62 | unsigned long size_tb; | ||
63 | } kaslr_regions[] = { | ||
64 | { &page_offset_base, 64/* Maximum */ }, | ||
65 | { &vmalloc_base, VMALLOC_SIZE_TB }, | ||
66 | }; | ||
67 | |||
68 | /* Get size in bytes used by the memory region */ | ||
69 | static inline unsigned long get_padding(struct kaslr_memory_region *region) | ||
70 | { | ||
71 | return (region->size_tb << TB_SHIFT); | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Apply no randomization if KASLR was disabled at boot or if KASAN | ||
76 | * is enabled. KASAN shadow mappings rely on regions being PGD aligned. | ||
77 | */ | ||
78 | static inline bool kaslr_memory_enabled(void) | ||
79 | { | ||
80 | return kaslr_enabled() && !config_enabled(CONFIG_KASAN); | ||
81 | } | ||
82 | |||
83 | /* Initialize base and padding for each memory region randomized with KASLR */ | ||
84 | void __init kernel_randomize_memory(void) | ||
85 | { | ||
86 | size_t i; | ||
87 | unsigned long vaddr = vaddr_start; | ||
88 | unsigned long rand, memory_tb; | ||
89 | struct rnd_state rand_state; | ||
90 | unsigned long remain_entropy; | ||
91 | |||
92 | if (!kaslr_memory_enabled()) | ||
93 | return; | ||
94 | |||
95 | /* | ||
96 | * Update Physical memory mapping to available and | ||
97 | * add padding if needed (especially for memory hotplug support). | ||
98 | */ | ||
99 | BUG_ON(kaslr_regions[0].base != &page_offset_base); | ||
100 | memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT) + | ||
101 | CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING; | ||
102 | |||
103 | /* Adapt phyiscal memory region size based on available memory */ | ||
104 | if (memory_tb < kaslr_regions[0].size_tb) | ||
105 | kaslr_regions[0].size_tb = memory_tb; | ||
106 | |||
107 | /* Calculate entropy available between regions */ | ||
108 | remain_entropy = vaddr_end - vaddr_start; | ||
109 | for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) | ||
110 | remain_entropy -= get_padding(&kaslr_regions[i]); | ||
111 | |||
112 | prandom_seed_state(&rand_state, kaslr_get_random_long("Memory")); | ||
113 | |||
114 | for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) { | ||
115 | unsigned long entropy; | ||
116 | |||
117 | /* | ||
118 | * Select a random virtual address using the extra entropy | ||
119 | * available. | ||
120 | */ | ||
121 | entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i); | ||
122 | prandom_bytes_state(&rand_state, &rand, sizeof(rand)); | ||
123 | entropy = (rand % (entropy + 1)) & PUD_MASK; | ||
124 | vaddr += entropy; | ||
125 | *kaslr_regions[i].base = vaddr; | ||
126 | |||
127 | /* | ||
128 | * Jump the region and add a minimum padding based on | ||
129 | * randomization alignment. | ||
130 | */ | ||
131 | vaddr += get_padding(&kaslr_regions[i]); | ||
132 | vaddr = round_up(vaddr + 1, PUD_SIZE); | ||
133 | remain_entropy -= entropy; | ||
134 | } | ||
135 | } | ||
136 | |||
137 | /* | ||
138 | * Create PGD aligned trampoline table to allow real mode initialization | ||
139 | * of additional CPUs. Consume only 1 low memory page. | ||
140 | */ | ||
141 | void __meminit init_trampoline(void) | ||
142 | { | ||
143 | unsigned long paddr, paddr_next; | ||
144 | pgd_t *pgd; | ||
145 | pud_t *pud_page, *pud_page_tramp; | ||
146 | int i; | ||
147 | |||
148 | if (!kaslr_memory_enabled()) { | ||
149 | init_trampoline_default(); | ||
150 | return; | ||
151 | } | ||
152 | |||
153 | pud_page_tramp = alloc_low_page(); | ||
154 | |||
155 | paddr = 0; | ||
156 | pgd = pgd_offset_k((unsigned long)__va(paddr)); | ||
157 | pud_page = (pud_t *) pgd_page_vaddr(*pgd); | ||
158 | |||
159 | for (i = pud_index(paddr); i < PTRS_PER_PUD; i++, paddr = paddr_next) { | ||
160 | pud_t *pud, *pud_tramp; | ||
161 | unsigned long vaddr = (unsigned long)__va(paddr); | ||
162 | |||
163 | pud_tramp = pud_page_tramp + pud_index(paddr); | ||
164 | pud = pud_page + pud_index(vaddr); | ||
165 | paddr_next = (paddr & PUD_MASK) + PUD_SIZE; | ||
166 | |||
167 | *pud_tramp = *pud; | ||
168 | } | ||
169 | |||
170 | set_pgd(&trampoline_pgd_entry, | ||
171 | __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); | ||
172 | } | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 47870a534877..849dc09fa4f0 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -101,7 +101,8 @@ static inline unsigned long highmap_start_pfn(void) | |||
101 | 101 | ||
102 | static inline unsigned long highmap_end_pfn(void) | 102 | static inline unsigned long highmap_end_pfn(void) |
103 | { | 103 | { |
104 | return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; | 104 | /* Do not reference physical address outside the kernel. */ |
105 | return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT; | ||
105 | } | 106 | } |
106 | 107 | ||
107 | #endif | 108 | #endif |
@@ -112,6 +113,12 @@ within(unsigned long addr, unsigned long start, unsigned long end) | |||
112 | return addr >= start && addr < end; | 113 | return addr >= start && addr < end; |
113 | } | 114 | } |
114 | 115 | ||
116 | static inline int | ||
117 | within_inclusive(unsigned long addr, unsigned long start, unsigned long end) | ||
118 | { | ||
119 | return addr >= start && addr <= end; | ||
120 | } | ||
121 | |||
115 | /* | 122 | /* |
116 | * Flushing functions | 123 | * Flushing functions |
117 | */ | 124 | */ |
@@ -1299,7 +1306,8 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
1299 | * to touch the high mapped kernel as well: | 1306 | * to touch the high mapped kernel as well: |
1300 | */ | 1307 | */ |
1301 | if (!within(vaddr, (unsigned long)_text, _brk_end) && | 1308 | if (!within(vaddr, (unsigned long)_text, _brk_end) && |
1302 | within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) { | 1309 | within_inclusive(cpa->pfn, highmap_start_pfn(), |
1310 | highmap_end_pfn())) { | ||
1303 | unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + | 1311 | unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + |
1304 | __START_KERNEL_map - phys_base; | 1312 | __START_KERNEL_map - phys_base; |
1305 | alias_cpa = *cpa; | 1313 | alias_cpa = *cpa; |