aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-25 20:32:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-25 20:32:28 -0400
commit77cd3d0c43b7e6c0bb49ca641cf936891f6e1766 (patch)
tree44885bb7ec9def5a34b8e9f2073a166f78bddcf9 /arch/x86/mm
parent0f657262d5f99ad86b9a63fb5dcd29036c2ed916 (diff)
parent6a79296cb15d947bcb4558011fe066e5d8252b35 (diff)
Merge branch 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 boot updates from Ingo Molnar: "The main changes: - add initial commits to randomize kernel memory section virtual addresses, enabled via a new kernel option: RANDOMIZE_MEMORY (Thomas Garnier, Kees Cook, Baoquan He, Yinghai Lu) - enhance KASLR (RANDOMIZE_BASE) physical memory randomization (Kees Cook) - EBDA/BIOS region boot quirk cleanups (Andy Lutomirski, Ingo Molnar) - misc cleanups/fixes" * 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/boot: Simplify EBDA-vs-BIOS reservation logic x86/boot: Clarify what x86_legacy_features.reserve_bios_regions does x86/boot: Reorganize and clean up the BIOS area reservation code x86/mm: Do not reference phys addr beyond kernel x86/mm: Add memory hotplug support for KASLR memory randomization x86/mm: Enable KASLR for vmalloc memory regions x86/mm: Enable KASLR for physical mapping memory regions x86/mm: Implement ASLR for kernel memory regions x86/mm: Separate variable for trampoline PGD x86/mm: Add PUD VA support for physical mapping x86/mm: Update physical mapping variable names x86/mm: Refactor KASLR entropy functions x86/KASLR: Fix boot crash with certain memory configurations x86/boot/64: Add forgotten end of function marker x86/KASLR: Allow randomization below the load address x86/KASLR: Extend kernel image physical address randomization to addresses larger than 4G x86/KASLR: Randomize virtual address separately x86/KASLR: Clarify identity map interface x86/boot: Refuse to build with data relocations x86/KASLR, x86/power: Remove x86 hibernation restrictions
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/dump_pagetables.c16
-rw-r--r--arch/x86/mm/init.c4
-rw-r--r--arch/x86/mm/init_64.c167
-rw-r--r--arch/x86/mm/kaslr.c172
-rw-r--r--arch/x86/mm/pageattr.c12
6 files changed, 300 insertions, 72 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 62c0043a5fd5..96d2b847e09e 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -37,4 +37,5 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
37 37
38obj-$(CONFIG_X86_INTEL_MPX) += mpx.o 38obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
39obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o 39obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
40obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
40 41
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 99bfb192803f..9a17250bcbe0 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -72,9 +72,9 @@ static struct addr_marker address_markers[] = {
72 { 0, "User Space" }, 72 { 0, "User Space" },
73#ifdef CONFIG_X86_64 73#ifdef CONFIG_X86_64
74 { 0x8000000000000000UL, "Kernel Space" }, 74 { 0x8000000000000000UL, "Kernel Space" },
75 { PAGE_OFFSET, "Low Kernel Mapping" }, 75 { 0/* PAGE_OFFSET */, "Low Kernel Mapping" },
76 { VMALLOC_START, "vmalloc() Area" }, 76 { 0/* VMALLOC_START */, "vmalloc() Area" },
77 { VMEMMAP_START, "Vmemmap" }, 77 { 0/* VMEMMAP_START */, "Vmemmap" },
78# ifdef CONFIG_X86_ESPFIX64 78# ifdef CONFIG_X86_ESPFIX64
79 { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, 79 { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
80# endif 80# endif
@@ -434,8 +434,16 @@ void ptdump_walk_pgd_level_checkwx(void)
434 434
435static int __init pt_dump_init(void) 435static int __init pt_dump_init(void)
436{ 436{
437 /*
438 * Various markers are not compile-time constants, so assign them
439 * here.
440 */
441#ifdef CONFIG_X86_64
442 address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
443 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
444 address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
445#endif
437#ifdef CONFIG_X86_32 446#ifdef CONFIG_X86_32
438 /* Not a compile-time constant on x86-32 */
439 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; 447 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
440 address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; 448 address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
441# ifdef CONFIG_HIGHMEM 449# ifdef CONFIG_HIGHMEM
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 372aad2b3291..cc82830bc8c4 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -17,6 +17,7 @@
17#include <asm/proto.h> 17#include <asm/proto.h>
18#include <asm/dma.h> /* for MAX_DMA_PFN */ 18#include <asm/dma.h> /* for MAX_DMA_PFN */
19#include <asm/microcode.h> 19#include <asm/microcode.h>
20#include <asm/kaslr.h>
20 21
21/* 22/*
22 * We need to define the tracepoints somewhere, and tlb.c 23 * We need to define the tracepoints somewhere, and tlb.c
@@ -590,6 +591,9 @@ void __init init_mem_mapping(void)
590 /* the ISA range is always mapped regardless of memory holes */ 591 /* the ISA range is always mapped regardless of memory holes */
591 init_memory_mapping(0, ISA_END_ADDRESS); 592 init_memory_mapping(0, ISA_END_ADDRESS);
592 593
594 /* Init the trampoline, possibly with KASLR memory offset */
595 init_trampoline();
596
593 /* 597 /*
594 * If the allocation is in bottom-up direction, we setup direct mapping 598 * If the allocation is in bottom-up direction, we setup direct mapping
595 * in bottom-up, otherwise we setup direct mapping in top-down. 599 * in bottom-up, otherwise we setup direct mapping in top-down.
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e14f87057c3f..53cc2256cf23 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -328,22 +328,30 @@ void __init cleanup_highmap(void)
328 } 328 }
329} 329}
330 330
331/*
332 * Create PTE level page table mapping for physical addresses.
333 * It returns the last physical address mapped.
334 */
331static unsigned long __meminit 335static unsigned long __meminit
332phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, 336phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
333 pgprot_t prot) 337 pgprot_t prot)
334{ 338{
335 unsigned long pages = 0, next; 339 unsigned long pages = 0, paddr_next;
336 unsigned long last_map_addr = end; 340 unsigned long paddr_last = paddr_end;
341 pte_t *pte;
337 int i; 342 int i;
338 343
339 pte_t *pte = pte_page + pte_index(addr); 344 pte = pte_page + pte_index(paddr);
345 i = pte_index(paddr);
340 346
341 for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) { 347 for (; i < PTRS_PER_PTE; i++, paddr = paddr_next, pte++) {
342 next = (addr & PAGE_MASK) + PAGE_SIZE; 348 paddr_next = (paddr & PAGE_MASK) + PAGE_SIZE;
343 if (addr >= end) { 349 if (paddr >= paddr_end) {
344 if (!after_bootmem && 350 if (!after_bootmem &&
345 !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) && 351 !e820_any_mapped(paddr & PAGE_MASK, paddr_next,
346 !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN)) 352 E820_RAM) &&
353 !e820_any_mapped(paddr & PAGE_MASK, paddr_next,
354 E820_RESERVED_KERN))
347 set_pte(pte, __pte(0)); 355 set_pte(pte, __pte(0));
348 continue; 356 continue;
349 } 357 }
@@ -361,37 +369,44 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
361 } 369 }
362 370
363 if (0) 371 if (0)
364 printk(" pte=%p addr=%lx pte=%016lx\n", 372 pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr,
365 pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); 373 pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
366 pages++; 374 pages++;
367 set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot)); 375 set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
368 last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; 376 paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
369 } 377 }
370 378
371 update_page_count(PG_LEVEL_4K, pages); 379 update_page_count(PG_LEVEL_4K, pages);
372 380
373 return last_map_addr; 381 return paddr_last;
374} 382}
375 383
384/*
385 * Create PMD level page table mapping for physical addresses. The virtual
386 * and physical address have to be aligned at this level.
387 * It returns the last physical address mapped.
388 */
376static unsigned long __meminit 389static unsigned long __meminit
377phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, 390phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
378 unsigned long page_size_mask, pgprot_t prot) 391 unsigned long page_size_mask, pgprot_t prot)
379{ 392{
380 unsigned long pages = 0, next; 393 unsigned long pages = 0, paddr_next;
381 unsigned long last_map_addr = end; 394 unsigned long paddr_last = paddr_end;
382 395
383 int i = pmd_index(address); 396 int i = pmd_index(paddr);
384 397
385 for (; i < PTRS_PER_PMD; i++, address = next) { 398 for (; i < PTRS_PER_PMD; i++, paddr = paddr_next) {
386 pmd_t *pmd = pmd_page + pmd_index(address); 399 pmd_t *pmd = pmd_page + pmd_index(paddr);
387 pte_t *pte; 400 pte_t *pte;
388 pgprot_t new_prot = prot; 401 pgprot_t new_prot = prot;
389 402
390 next = (address & PMD_MASK) + PMD_SIZE; 403 paddr_next = (paddr & PMD_MASK) + PMD_SIZE;
391 if (address >= end) { 404 if (paddr >= paddr_end) {
392 if (!after_bootmem && 405 if (!after_bootmem &&
393 !e820_any_mapped(address & PMD_MASK, next, E820_RAM) && 406 !e820_any_mapped(paddr & PMD_MASK, paddr_next,
394 !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN)) 407 E820_RAM) &&
408 !e820_any_mapped(paddr & PMD_MASK, paddr_next,
409 E820_RESERVED_KERN))
395 set_pmd(pmd, __pmd(0)); 410 set_pmd(pmd, __pmd(0));
396 continue; 411 continue;
397 } 412 }
@@ -400,8 +415,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
400 if (!pmd_large(*pmd)) { 415 if (!pmd_large(*pmd)) {
401 spin_lock(&init_mm.page_table_lock); 416 spin_lock(&init_mm.page_table_lock);
402 pte = (pte_t *)pmd_page_vaddr(*pmd); 417 pte = (pte_t *)pmd_page_vaddr(*pmd);
403 last_map_addr = phys_pte_init(pte, address, 418 paddr_last = phys_pte_init(pte, paddr,
404 end, prot); 419 paddr_end, prot);
405 spin_unlock(&init_mm.page_table_lock); 420 spin_unlock(&init_mm.page_table_lock);
406 continue; 421 continue;
407 } 422 }
@@ -420,7 +435,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
420 if (page_size_mask & (1 << PG_LEVEL_2M)) { 435 if (page_size_mask & (1 << PG_LEVEL_2M)) {
421 if (!after_bootmem) 436 if (!after_bootmem)
422 pages++; 437 pages++;
423 last_map_addr = next; 438 paddr_last = paddr_next;
424 continue; 439 continue;
425 } 440 }
426 new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); 441 new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
@@ -430,42 +445,54 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
430 pages++; 445 pages++;
431 spin_lock(&init_mm.page_table_lock); 446 spin_lock(&init_mm.page_table_lock);
432 set_pte((pte_t *)pmd, 447 set_pte((pte_t *)pmd,
433 pfn_pte((address & PMD_MASK) >> PAGE_SHIFT, 448 pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
434 __pgprot(pgprot_val(prot) | _PAGE_PSE))); 449 __pgprot(pgprot_val(prot) | _PAGE_PSE)));
435 spin_unlock(&init_mm.page_table_lock); 450 spin_unlock(&init_mm.page_table_lock);
436 last_map_addr = next; 451 paddr_last = paddr_next;
437 continue; 452 continue;
438 } 453 }
439 454
440 pte = alloc_low_page(); 455 pte = alloc_low_page();
441 last_map_addr = phys_pte_init(pte, address, end, new_prot); 456 paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
442 457
443 spin_lock(&init_mm.page_table_lock); 458 spin_lock(&init_mm.page_table_lock);
444 pmd_populate_kernel(&init_mm, pmd, pte); 459 pmd_populate_kernel(&init_mm, pmd, pte);
445 spin_unlock(&init_mm.page_table_lock); 460 spin_unlock(&init_mm.page_table_lock);
446 } 461 }
447 update_page_count(PG_LEVEL_2M, pages); 462 update_page_count(PG_LEVEL_2M, pages);
448 return last_map_addr; 463 return paddr_last;
449} 464}
450 465
466/*
467 * Create PUD level page table mapping for physical addresses. The virtual
468 * and physical address do not have to be aligned at this level. KASLR can
469 * randomize virtual addresses up to this level.
470 * It returns the last physical address mapped.
471 */
451static unsigned long __meminit 472static unsigned long __meminit
452phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, 473phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
453 unsigned long page_size_mask) 474 unsigned long page_size_mask)
454{ 475{
455 unsigned long pages = 0, next; 476 unsigned long pages = 0, paddr_next;
456 unsigned long last_map_addr = end; 477 unsigned long paddr_last = paddr_end;
457 int i = pud_index(addr); 478 unsigned long vaddr = (unsigned long)__va(paddr);
479 int i = pud_index(vaddr);
458 480
459 for (; i < PTRS_PER_PUD; i++, addr = next) { 481 for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) {
460 pud_t *pud = pud_page + pud_index(addr); 482 pud_t *pud;
461 pmd_t *pmd; 483 pmd_t *pmd;
462 pgprot_t prot = PAGE_KERNEL; 484 pgprot_t prot = PAGE_KERNEL;
463 485
464 next = (addr & PUD_MASK) + PUD_SIZE; 486 vaddr = (unsigned long)__va(paddr);
465 if (addr >= end) { 487 pud = pud_page + pud_index(vaddr);
488 paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
489
490 if (paddr >= paddr_end) {
466 if (!after_bootmem && 491 if (!after_bootmem &&
467 !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) && 492 !e820_any_mapped(paddr & PUD_MASK, paddr_next,
468 !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN)) 493 E820_RAM) &&
494 !e820_any_mapped(paddr & PUD_MASK, paddr_next,
495 E820_RESERVED_KERN))
469 set_pud(pud, __pud(0)); 496 set_pud(pud, __pud(0));
470 continue; 497 continue;
471 } 498 }
@@ -473,8 +500,10 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
473 if (!pud_none(*pud)) { 500 if (!pud_none(*pud)) {
474 if (!pud_large(*pud)) { 501 if (!pud_large(*pud)) {
475 pmd = pmd_offset(pud, 0); 502 pmd = pmd_offset(pud, 0);
476 last_map_addr = phys_pmd_init(pmd, addr, end, 503 paddr_last = phys_pmd_init(pmd, paddr,
477 page_size_mask, prot); 504 paddr_end,
505 page_size_mask,
506 prot);
478 __flush_tlb_all(); 507 __flush_tlb_all();
479 continue; 508 continue;
480 } 509 }
@@ -493,7 +522,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
493 if (page_size_mask & (1 << PG_LEVEL_1G)) { 522 if (page_size_mask & (1 << PG_LEVEL_1G)) {
494 if (!after_bootmem) 523 if (!after_bootmem)
495 pages++; 524 pages++;
496 last_map_addr = next; 525 paddr_last = paddr_next;
497 continue; 526 continue;
498 } 527 }
499 prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); 528 prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
@@ -503,16 +532,16 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
503 pages++; 532 pages++;
504 spin_lock(&init_mm.page_table_lock); 533 spin_lock(&init_mm.page_table_lock);
505 set_pte((pte_t *)pud, 534 set_pte((pte_t *)pud,
506 pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT, 535 pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
507 PAGE_KERNEL_LARGE)); 536 PAGE_KERNEL_LARGE));
508 spin_unlock(&init_mm.page_table_lock); 537 spin_unlock(&init_mm.page_table_lock);
509 last_map_addr = next; 538 paddr_last = paddr_next;
510 continue; 539 continue;
511 } 540 }
512 541
513 pmd = alloc_low_page(); 542 pmd = alloc_low_page();
514 last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, 543 paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
515 prot); 544 page_size_mask, prot);
516 545
517 spin_lock(&init_mm.page_table_lock); 546 spin_lock(&init_mm.page_table_lock);
518 pud_populate(&init_mm, pud, pmd); 547 pud_populate(&init_mm, pud, pmd);
@@ -522,38 +551,44 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
522 551
523 update_page_count(PG_LEVEL_1G, pages); 552 update_page_count(PG_LEVEL_1G, pages);
524 553
525 return last_map_addr; 554 return paddr_last;
526} 555}
527 556
557/*
558 * Create page table mapping for the physical memory for specific physical
559 * addresses. The virtual and physical addresses have to be aligned on PMD level
560 * down. It returns the last physical address mapped.
561 */
528unsigned long __meminit 562unsigned long __meminit
529kernel_physical_mapping_init(unsigned long start, 563kernel_physical_mapping_init(unsigned long paddr_start,
530 unsigned long end, 564 unsigned long paddr_end,
531 unsigned long page_size_mask) 565 unsigned long page_size_mask)
532{ 566{
533 bool pgd_changed = false; 567 bool pgd_changed = false;
534 unsigned long next, last_map_addr = end; 568 unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
535 unsigned long addr;
536 569
537 start = (unsigned long)__va(start); 570 paddr_last = paddr_end;
538 end = (unsigned long)__va(end); 571 vaddr = (unsigned long)__va(paddr_start);
539 addr = start; 572 vaddr_end = (unsigned long)__va(paddr_end);
573 vaddr_start = vaddr;
540 574
541 for (; start < end; start = next) { 575 for (; vaddr < vaddr_end; vaddr = vaddr_next) {
542 pgd_t *pgd = pgd_offset_k(start); 576 pgd_t *pgd = pgd_offset_k(vaddr);
543 pud_t *pud; 577 pud_t *pud;
544 578
545 next = (start & PGDIR_MASK) + PGDIR_SIZE; 579 vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
546 580
547 if (pgd_val(*pgd)) { 581 if (pgd_val(*pgd)) {
548 pud = (pud_t *)pgd_page_vaddr(*pgd); 582 pud = (pud_t *)pgd_page_vaddr(*pgd);
549 last_map_addr = phys_pud_init(pud, __pa(start), 583 paddr_last = phys_pud_init(pud, __pa(vaddr),
550 __pa(end), page_size_mask); 584 __pa(vaddr_end),
585 page_size_mask);
551 continue; 586 continue;
552 } 587 }
553 588
554 pud = alloc_low_page(); 589 pud = alloc_low_page();
555 last_map_addr = phys_pud_init(pud, __pa(start), __pa(end), 590 paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end),
556 page_size_mask); 591 page_size_mask);
557 592
558 spin_lock(&init_mm.page_table_lock); 593 spin_lock(&init_mm.page_table_lock);
559 pgd_populate(&init_mm, pgd, pud); 594 pgd_populate(&init_mm, pgd, pud);
@@ -562,11 +597,11 @@ kernel_physical_mapping_init(unsigned long start,
562 } 597 }
563 598
564 if (pgd_changed) 599 if (pgd_changed)
565 sync_global_pgds(addr, end - 1, 0); 600 sync_global_pgds(vaddr_start, vaddr_end - 1, 0);
566 601
567 __flush_tlb_all(); 602 __flush_tlb_all();
568 603
569 return last_map_addr; 604 return paddr_last;
570} 605}
571 606
572#ifndef CONFIG_NUMA 607#ifndef CONFIG_NUMA
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
new file mode 100644
index 000000000000..26dccd6c0df1
--- /dev/null
+++ b/arch/x86/mm/kaslr.c
@@ -0,0 +1,172 @@
1/*
2 * This file implements KASLR memory randomization for x86_64. It randomizes
3 * the virtual address space of kernel memory regions (physical memory
4 * mapping, vmalloc & vmemmap) for x86_64. This security feature mitigates
5 * exploits relying on predictable kernel addresses.
6 *
7 * Entropy is generated using the KASLR early boot functions now shared in
8 * the lib directory (originally written by Kees Cook). Randomization is
9 * done on PGD & PUD page table levels to increase possible addresses. The
10 * physical memory mapping code was adapted to support PUD level virtual
11 * addresses. This implementation on the best configuration provides 30,000
12 * possible virtual addresses in average for each memory region. An additional
13 * low memory page is used to ensure each CPU can start with a PGD aligned
14 * virtual address (for realmode).
15 *
16 * The order of each memory region is not changed. The feature looks at
17 * the available space for the regions based on different configuration
18 * options and randomizes the base and space between each. The size of the
19 * physical memory mapping is the available physical memory.
20 */
21
22#include <linux/kernel.h>
23#include <linux/init.h>
24#include <linux/random.h>
25
26#include <asm/pgalloc.h>
27#include <asm/pgtable.h>
28#include <asm/setup.h>
29#include <asm/kaslr.h>
30
31#include "mm_internal.h"
32
33#define TB_SHIFT 40
34
35/*
36 * Virtual address start and end range for randomization. The end changes base
37 * on configuration to have the highest amount of space for randomization.
38 * It increases the possible random position for each randomized region.
39 *
40 * You need to add an if/def entry if you introduce a new memory region
41 * compatible with KASLR. Your entry must be in logical order with memory
42 * layout. For example, ESPFIX is before EFI because its virtual address is
43 * before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to
44 * ensure that this order is correct and won't be changed.
45 */
46static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
47static const unsigned long vaddr_end = VMEMMAP_START;
48
49/* Default values */
50unsigned long page_offset_base = __PAGE_OFFSET_BASE;
51EXPORT_SYMBOL(page_offset_base);
52unsigned long vmalloc_base = __VMALLOC_BASE;
53EXPORT_SYMBOL(vmalloc_base);
54
55/*
56 * Memory regions randomized by KASLR (except modules that use a separate logic
57 * earlier during boot). The list is ordered based on virtual addresses. This
58 * order is kept after randomization.
59 */
60static __initdata struct kaslr_memory_region {
61 unsigned long *base;
62 unsigned long size_tb;
63} kaslr_regions[] = {
64 { &page_offset_base, 64/* Maximum */ },
65 { &vmalloc_base, VMALLOC_SIZE_TB },
66};
67
68/* Get size in bytes used by the memory region */
69static inline unsigned long get_padding(struct kaslr_memory_region *region)
70{
71 return (region->size_tb << TB_SHIFT);
72}
73
74/*
75 * Apply no randomization if KASLR was disabled at boot or if KASAN
76 * is enabled. KASAN shadow mappings rely on regions being PGD aligned.
77 */
78static inline bool kaslr_memory_enabled(void)
79{
80 return kaslr_enabled() && !config_enabled(CONFIG_KASAN);
81}
82
83/* Initialize base and padding for each memory region randomized with KASLR */
84void __init kernel_randomize_memory(void)
85{
86 size_t i;
87 unsigned long vaddr = vaddr_start;
88 unsigned long rand, memory_tb;
89 struct rnd_state rand_state;
90 unsigned long remain_entropy;
91
92 if (!kaslr_memory_enabled())
93 return;
94
95 /*
96 * Update Physical memory mapping to available and
97 * add padding if needed (especially for memory hotplug support).
98 */
99 BUG_ON(kaslr_regions[0].base != &page_offset_base);
100 memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT) +
101 CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
102
103 /* Adapt phyiscal memory region size based on available memory */
104 if (memory_tb < kaslr_regions[0].size_tb)
105 kaslr_regions[0].size_tb = memory_tb;
106
107 /* Calculate entropy available between regions */
108 remain_entropy = vaddr_end - vaddr_start;
109 for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++)
110 remain_entropy -= get_padding(&kaslr_regions[i]);
111
112 prandom_seed_state(&rand_state, kaslr_get_random_long("Memory"));
113
114 for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) {
115 unsigned long entropy;
116
117 /*
118 * Select a random virtual address using the extra entropy
119 * available.
120 */
121 entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
122 prandom_bytes_state(&rand_state, &rand, sizeof(rand));
123 entropy = (rand % (entropy + 1)) & PUD_MASK;
124 vaddr += entropy;
125 *kaslr_regions[i].base = vaddr;
126
127 /*
128 * Jump the region and add a minimum padding based on
129 * randomization alignment.
130 */
131 vaddr += get_padding(&kaslr_regions[i]);
132 vaddr = round_up(vaddr + 1, PUD_SIZE);
133 remain_entropy -= entropy;
134 }
135}
136
137/*
138 * Create PGD aligned trampoline table to allow real mode initialization
139 * of additional CPUs. Consume only 1 low memory page.
140 */
141void __meminit init_trampoline(void)
142{
143 unsigned long paddr, paddr_next;
144 pgd_t *pgd;
145 pud_t *pud_page, *pud_page_tramp;
146 int i;
147
148 if (!kaslr_memory_enabled()) {
149 init_trampoline_default();
150 return;
151 }
152
153 pud_page_tramp = alloc_low_page();
154
155 paddr = 0;
156 pgd = pgd_offset_k((unsigned long)__va(paddr));
157 pud_page = (pud_t *) pgd_page_vaddr(*pgd);
158
159 for (i = pud_index(paddr); i < PTRS_PER_PUD; i++, paddr = paddr_next) {
160 pud_t *pud, *pud_tramp;
161 unsigned long vaddr = (unsigned long)__va(paddr);
162
163 pud_tramp = pud_page_tramp + pud_index(paddr);
164 pud = pud_page + pud_index(vaddr);
165 paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
166
167 *pud_tramp = *pud;
168 }
169
170 set_pgd(&trampoline_pgd_entry,
171 __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
172}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 47870a534877..849dc09fa4f0 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -101,7 +101,8 @@ static inline unsigned long highmap_start_pfn(void)
101 101
102static inline unsigned long highmap_end_pfn(void) 102static inline unsigned long highmap_end_pfn(void)
103{ 103{
104 return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; 104 /* Do not reference physical address outside the kernel. */
105 return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
105} 106}
106 107
107#endif 108#endif
@@ -112,6 +113,12 @@ within(unsigned long addr, unsigned long start, unsigned long end)
112 return addr >= start && addr < end; 113 return addr >= start && addr < end;
113} 114}
114 115
116static inline int
117within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
118{
119 return addr >= start && addr <= end;
120}
121
115/* 122/*
116 * Flushing functions 123 * Flushing functions
117 */ 124 */
@@ -1299,7 +1306,8 @@ static int cpa_process_alias(struct cpa_data *cpa)
1299 * to touch the high mapped kernel as well: 1306 * to touch the high mapped kernel as well:
1300 */ 1307 */
1301 if (!within(vaddr, (unsigned long)_text, _brk_end) && 1308 if (!within(vaddr, (unsigned long)_text, _brk_end) &&
1302 within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) { 1309 within_inclusive(cpa->pfn, highmap_start_pfn(),
1310 highmap_end_pfn())) {
1303 unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + 1311 unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
1304 __START_KERNEL_map - phys_base; 1312 __START_KERNEL_map - phys_base;
1305 alias_cpa = *cpa; 1313 alias_cpa = *cpa;