summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-17 15:30:06 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-17 15:30:06 -0500
commit1d966eb4d6326a2521073174e9710713e9846e8b (patch)
tree5bb60aa0c8d82cd44116b12cb7a325ff61329da2
parent9a4ba2ab08a2cefd8fa2b6829219093fbed4df75 (diff)
parent45d55e7bac4028af93f5fa324e69958a0b868e96 (diff)
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar: "Misc fixes: - A rather involved set of memory hardware encryption fixes to support the early loading of microcode files via the initrd. These are larger than what we normally take at such a late -rc stage, but there are two mitigating factors: 1) much of the changes are limited to the SME code itself 2) being able to early load microcode has increased importance in the post-Meltdown/Spectre era. - An IRQ vector allocator fix - An Intel RDT driver use-after-free fix - An APIC driver bug fix/revert to make certain older systems boot again - A pkeys ABI fix - TSC calibration fixes - A kdump fix" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/apic/vector: Fix off by one in error path x86/intel_rdt/cqm: Prevent use after free x86/mm: Encrypt the initrd earlier for BSP microcode update x86/mm: Prepare sme_encrypt_kernel() for PAGE aligned encryption x86/mm: Centralize PMD flags in sme_encrypt_kernel() x86/mm: Use a struct to reduce parameters for SME PGD mapping x86/mm: Clean up register saving in the __enc_copy() assembly code x86/idt: Mark IDT tables __initconst Revert "x86/apic: Remove init_bsp_APIC()" x86/mm/pkeys: Fix fill_sig_info_pkey x86/tsc: Print tsc_khz, when it differs from cpu_khz x86/tsc: Fix erroneous TSC rate on Skylake Xeon x86/tsc: Future-proof native_calibrate_tsc() kdump: Write the correct address of mem_section into vmcoreinfo
-rw-r--r--arch/x86/include/asm/apic.h1
-rw-r--r--arch/x86/include/asm/mem_encrypt.h4
-rw-r--r--arch/x86/kernel/apic/apic.c49
-rw-r--r--arch/x86/kernel/apic/vector.c7
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.c8
-rw-r--r--arch/x86/kernel/head64.c4
-rw-r--r--arch/x86/kernel/idt.c12
-rw-r--r--arch/x86/kernel/irqinit.c3
-rw-r--r--arch/x86/kernel/setup.c10
-rw-r--r--arch/x86/kernel/tsc.c9
-rw-r--r--arch/x86/mm/fault.c7
-rw-r--r--arch/x86/mm/mem_encrypt.c356
-rw-r--r--arch/x86/mm/mem_encrypt_boot.S80
13 files changed, 388 insertions, 162 deletions
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index a9e57f08bfa6..98722773391d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -136,6 +136,7 @@ extern void disconnect_bsp_APIC(int virt_wire_setup);
136extern void disable_local_APIC(void); 136extern void disable_local_APIC(void);
137extern void lapic_shutdown(void); 137extern void lapic_shutdown(void);
138extern void sync_Arb_IDs(void); 138extern void sync_Arb_IDs(void);
139extern void init_bsp_APIC(void);
139extern void apic_intr_mode_init(void); 140extern void apic_intr_mode_init(void);
140extern void setup_local_APIC(void); 141extern void setup_local_APIC(void);
141extern void init_apic_mappings(void); 142extern void init_apic_mappings(void);
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index c9459a4c3c68..22c5f3e6f820 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -39,7 +39,7 @@ void __init sme_unmap_bootdata(char *real_mode_data);
39 39
40void __init sme_early_init(void); 40void __init sme_early_init(void);
41 41
42void __init sme_encrypt_kernel(void); 42void __init sme_encrypt_kernel(struct boot_params *bp);
43void __init sme_enable(struct boot_params *bp); 43void __init sme_enable(struct boot_params *bp);
44 44
45int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); 45int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
@@ -67,7 +67,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
67 67
68static inline void __init sme_early_init(void) { } 68static inline void __init sme_early_init(void) { }
69 69
70static inline void __init sme_encrypt_kernel(void) { } 70static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
71static inline void __init sme_enable(struct boot_params *bp) { } 71static inline void __init sme_enable(struct boot_params *bp) { }
72 72
73static inline bool sme_active(void) { return false; } 73static inline bool sme_active(void) { return false; }
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 880441f24146..25ddf02598d2 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1286,6 +1286,55 @@ static int __init apic_intr_mode_select(void)
1286 return APIC_SYMMETRIC_IO; 1286 return APIC_SYMMETRIC_IO;
1287} 1287}
1288 1288
1289/*
1290 * An initial setup of the virtual wire mode.
1291 */
1292void __init init_bsp_APIC(void)
1293{
1294 unsigned int value;
1295
1296 /*
1297 * Don't do the setup now if we have a SMP BIOS as the
1298 * through-I/O-APIC virtual wire mode might be active.
1299 */
1300 if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
1301 return;
1302
1303 /*
1304 * Do not trust the local APIC being empty at bootup.
1305 */
1306 clear_local_APIC();
1307
1308 /*
1309 * Enable APIC.
1310 */
1311 value = apic_read(APIC_SPIV);
1312 value &= ~APIC_VECTOR_MASK;
1313 value |= APIC_SPIV_APIC_ENABLED;
1314
1315#ifdef CONFIG_X86_32
1316 /* This bit is reserved on P4/Xeon and should be cleared */
1317 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
1318 (boot_cpu_data.x86 == 15))
1319 value &= ~APIC_SPIV_FOCUS_DISABLED;
1320 else
1321#endif
1322 value |= APIC_SPIV_FOCUS_DISABLED;
1323 value |= SPURIOUS_APIC_VECTOR;
1324 apic_write(APIC_SPIV, value);
1325
1326 /*
1327 * Set up the virtual wire mode.
1328 */
1329 apic_write(APIC_LVT0, APIC_DM_EXTINT);
1330 value = APIC_DM_NMI;
1331 if (!lapic_is_integrated()) /* 82489DX */
1332 value |= APIC_LVT_LEVEL_TRIGGER;
1333 if (apic_extnmi == APIC_EXTNMI_NONE)
1334 value |= APIC_LVT_MASKED;
1335 apic_write(APIC_LVT1, value);
1336}
1337
1289/* Init the interrupt delivery mode for the BSP */ 1338/* Init the interrupt delivery mode for the BSP */
1290void __init apic_intr_mode_init(void) 1339void __init apic_intr_mode_init(void)
1291{ 1340{
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index f8b03bb8e725..3cc471beb50b 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -542,14 +542,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
542 542
543 err = assign_irq_vector_policy(irqd, info); 543 err = assign_irq_vector_policy(irqd, info);
544 trace_vector_setup(virq + i, false, err); 544 trace_vector_setup(virq + i, false, err);
545 if (err) 545 if (err) {
546 irqd->chip_data = NULL;
547 free_apic_chip_data(apicd);
546 goto error; 548 goto error;
549 }
547 } 550 }
548 551
549 return 0; 552 return 0;
550 553
551error: 554error:
552 x86_vector_free_irqs(domain, virq, i + 1); 555 x86_vector_free_irqs(domain, virq, i);
553 return err; 556 return err;
554} 557}
555 558
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 88dcf8479013..99442370de40 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
525 */ 525 */
526 if (static_branch_unlikely(&rdt_mon_enable_key)) 526 if (static_branch_unlikely(&rdt_mon_enable_key))
527 rmdir_mondata_subdir_allrdtgrp(r, d->id); 527 rmdir_mondata_subdir_allrdtgrp(r, d->id);
528 kfree(d->ctrl_val);
529 kfree(d->rmid_busy_llc);
530 kfree(d->mbm_total);
531 kfree(d->mbm_local);
532 list_del(&d->list); 528 list_del(&d->list);
533 if (is_mbm_enabled()) 529 if (is_mbm_enabled())
534 cancel_delayed_work(&d->mbm_over); 530 cancel_delayed_work(&d->mbm_over);
@@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
545 cancel_delayed_work(&d->cqm_limbo); 541 cancel_delayed_work(&d->cqm_limbo);
546 } 542 }
547 543
544 kfree(d->ctrl_val);
545 kfree(d->rmid_busy_llc);
546 kfree(d->mbm_total);
547 kfree(d->mbm_local);
548 kfree(d); 548 kfree(d);
549 return; 549 return;
550 } 550 }
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 6a5d757b9cfd..7ba5d819ebe3 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
157 p = fixup_pointer(&phys_base, physaddr); 157 p = fixup_pointer(&phys_base, physaddr);
158 *p += load_delta - sme_get_me_mask(); 158 *p += load_delta - sme_get_me_mask();
159 159
160 /* Encrypt the kernel (if SME is active) */ 160 /* Encrypt the kernel and related (if SME is active) */
161 sme_encrypt_kernel(); 161 sme_encrypt_kernel(bp);
162 162
163 /* 163 /*
164 * Return the SME encryption mask (if SME is active) to be used as a 164 * Return the SME encryption mask (if SME is active) to be used as a
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index d985cef3984f..56d99be3706a 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -56,7 +56,7 @@ struct idt_data {
56 * Early traps running on the DEFAULT_STACK because the other interrupt 56 * Early traps running on the DEFAULT_STACK because the other interrupt
57 * stacks work only after cpu_init(). 57 * stacks work only after cpu_init().
58 */ 58 */
59static const __initdata struct idt_data early_idts[] = { 59static const __initconst struct idt_data early_idts[] = {
60 INTG(X86_TRAP_DB, debug), 60 INTG(X86_TRAP_DB, debug),
61 SYSG(X86_TRAP_BP, int3), 61 SYSG(X86_TRAP_BP, int3),
62#ifdef CONFIG_X86_32 62#ifdef CONFIG_X86_32
@@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = {
70 * the traps which use them are reinitialized with IST after cpu_init() has 70 * the traps which use them are reinitialized with IST after cpu_init() has
71 * set up TSS. 71 * set up TSS.
72 */ 72 */
73static const __initdata struct idt_data def_idts[] = { 73static const __initconst struct idt_data def_idts[] = {
74 INTG(X86_TRAP_DE, divide_error), 74 INTG(X86_TRAP_DE, divide_error),
75 INTG(X86_TRAP_NMI, nmi), 75 INTG(X86_TRAP_NMI, nmi),
76 INTG(X86_TRAP_BR, bounds), 76 INTG(X86_TRAP_BR, bounds),
@@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = {
108/* 108/*
109 * The APIC and SMP idt entries 109 * The APIC and SMP idt entries
110 */ 110 */
111static const __initdata struct idt_data apic_idts[] = { 111static const __initconst struct idt_data apic_idts[] = {
112#ifdef CONFIG_SMP 112#ifdef CONFIG_SMP
113 INTG(RESCHEDULE_VECTOR, reschedule_interrupt), 113 INTG(RESCHEDULE_VECTOR, reschedule_interrupt),
114 INTG(CALL_FUNCTION_VECTOR, call_function_interrupt), 114 INTG(CALL_FUNCTION_VECTOR, call_function_interrupt),
@@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = {
150 * Early traps running on the DEFAULT_STACK because the other interrupt 150 * Early traps running on the DEFAULT_STACK because the other interrupt
151 * stacks work only after cpu_init(). 151 * stacks work only after cpu_init().
152 */ 152 */
153static const __initdata struct idt_data early_pf_idts[] = { 153static const __initconst struct idt_data early_pf_idts[] = {
154 INTG(X86_TRAP_PF, page_fault), 154 INTG(X86_TRAP_PF, page_fault),
155}; 155};
156 156
@@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = {
158 * Override for the debug_idt. Same as the default, but with interrupt 158 * Override for the debug_idt. Same as the default, but with interrupt
159 * stack set to DEFAULT_STACK (0). Required for NMI trap handling. 159 * stack set to DEFAULT_STACK (0). Required for NMI trap handling.
160 */ 160 */
161static const __initdata struct idt_data dbg_idts[] = { 161static const __initconst struct idt_data dbg_idts[] = {
162 INTG(X86_TRAP_DB, debug), 162 INTG(X86_TRAP_DB, debug),
163 INTG(X86_TRAP_BP, int3), 163 INTG(X86_TRAP_BP, int3),
164}; 164};
@@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
180 * The exceptions which use Interrupt stacks. They are setup after 180 * The exceptions which use Interrupt stacks. They are setup after
181 * cpu_init() when the TSS has been initialized. 181 * cpu_init() when the TSS has been initialized.
182 */ 182 */
183static const __initdata struct idt_data ist_idts[] = { 183static const __initconst struct idt_data ist_idts[] = {
184 ISTG(X86_TRAP_DB, debug, DEBUG_STACK), 184 ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
185 ISTG(X86_TRAP_NMI, nmi, NMI_STACK), 185 ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
186 SISTG(X86_TRAP_BP, int3, DEBUG_STACK), 186 SISTG(X86_TRAP_BP, int3, DEBUG_STACK),
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 8da3e909e967..a539410c4ea9 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -61,6 +61,9 @@ void __init init_ISA_irqs(void)
61 struct irq_chip *chip = legacy_pic->chip; 61 struct irq_chip *chip = legacy_pic->chip;
62 int i; 62 int i;
63 63
64#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
65 init_bsp_APIC();
66#endif
64 legacy_pic->init(0); 67 legacy_pic->init(0);
65 68
66 for (i = 0; i < nr_legacy_irqs(); i++) 69 for (i = 0; i < nr_legacy_irqs(); i++)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 145810b0edf6..68d7ab81c62f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -364,16 +364,6 @@ static void __init reserve_initrd(void)
364 !ramdisk_image || !ramdisk_size) 364 !ramdisk_image || !ramdisk_size)
365 return; /* No initrd provided by bootloader */ 365 return; /* No initrd provided by bootloader */
366 366
367 /*
368 * If SME is active, this memory will be marked encrypted by the
369 * kernel when it is accessed (including relocation). However, the
370 * ramdisk image was loaded decrypted by the bootloader, so make
371 * sure that it is encrypted before accessing it. For SEV the
372 * ramdisk will already be encrypted, so only do this for SME.
373 */
374 if (sme_active())
375 sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
376
377 initrd_start = 0; 367 initrd_start = 0;
378 368
379 mapped_size = memblock_mem_size(max_pfn_mapped); 369 mapped_size = memblock_mem_size(max_pfn_mapped);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 8ea117f8142e..e169e85db434 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -602,7 +602,6 @@ unsigned long native_calibrate_tsc(void)
602 case INTEL_FAM6_KABYLAKE_DESKTOP: 602 case INTEL_FAM6_KABYLAKE_DESKTOP:
603 crystal_khz = 24000; /* 24.0 MHz */ 603 crystal_khz = 24000; /* 24.0 MHz */
604 break; 604 break;
605 case INTEL_FAM6_SKYLAKE_X:
606 case INTEL_FAM6_ATOM_DENVERTON: 605 case INTEL_FAM6_ATOM_DENVERTON:
607 crystal_khz = 25000; /* 25.0 MHz */ 606 crystal_khz = 25000; /* 25.0 MHz */
608 break; 607 break;
@@ -612,6 +611,8 @@ unsigned long native_calibrate_tsc(void)
612 } 611 }
613 } 612 }
614 613
614 if (crystal_khz == 0)
615 return 0;
615 /* 616 /*
616 * TSC frequency determined by CPUID is a "hardware reported" 617 * TSC frequency determined by CPUID is a "hardware reported"
617 * frequency and is the most accurate one so far we have. This 618 * frequency and is the most accurate one so far we have. This
@@ -1315,6 +1316,12 @@ void __init tsc_init(void)
1315 (unsigned long)cpu_khz / 1000, 1316 (unsigned long)cpu_khz / 1000,
1316 (unsigned long)cpu_khz % 1000); 1317 (unsigned long)cpu_khz % 1000);
1317 1318
1319 if (cpu_khz != tsc_khz) {
1320 pr_info("Detected %lu.%03lu MHz TSC",
1321 (unsigned long)tsc_khz / 1000,
1322 (unsigned long)tsc_khz % 1000);
1323 }
1324
1318 /* Sanitize TSC ADJUST before cyc2ns gets initialized */ 1325 /* Sanitize TSC ADJUST before cyc2ns gets initialized */
1319 tsc_store_and_check_tsc_adjust(true); 1326 tsc_store_and_check_tsc_adjust(true);
1320 1327
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 06fe3d51d385..b3e40773dce0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -172,14 +172,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
172 * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really 172 * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
173 * faulted on a pte with its pkey=4. 173 * faulted on a pte with its pkey=4.
174 */ 174 */
175static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) 175static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info,
176 u32 *pkey)
176{ 177{
177 /* This is effectively an #ifdef */ 178 /* This is effectively an #ifdef */
178 if (!boot_cpu_has(X86_FEATURE_OSPKE)) 179 if (!boot_cpu_has(X86_FEATURE_OSPKE))
179 return; 180 return;
180 181
181 /* Fault not from Protection Keys: nothing to do */ 182 /* Fault not from Protection Keys: nothing to do */
182 if (si_code != SEGV_PKUERR) 183 if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV))
183 return; 184 return;
184 /* 185 /*
185 * force_sig_info_fault() is called from a number of 186 * force_sig_info_fault() is called from a number of
@@ -218,7 +219,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
218 lsb = PAGE_SHIFT; 219 lsb = PAGE_SHIFT;
219 info.si_addr_lsb = lsb; 220 info.si_addr_lsb = lsb;
220 221
221 fill_sig_info_pkey(si_code, &info, pkey); 222 fill_sig_info_pkey(si_signo, si_code, &info, pkey);
222 223
223 force_sig_info(si_signo, &info, tsk); 224 force_sig_info(si_signo, &info, tsk);
224} 225}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 391b13402e40..3ef362f598e3 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -464,37 +464,62 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
464 set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); 464 set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
465} 465}
466 466
467static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start, 467struct sme_populate_pgd_data {
468 unsigned long end) 468 void *pgtable_area;
469 pgd_t *pgd;
470
471 pmdval_t pmd_flags;
472 pteval_t pte_flags;
473 unsigned long paddr;
474
475 unsigned long vaddr;
476 unsigned long vaddr_end;
477};
478
479static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
469{ 480{
470 unsigned long pgd_start, pgd_end, pgd_size; 481 unsigned long pgd_start, pgd_end, pgd_size;
471 pgd_t *pgd_p; 482 pgd_t *pgd_p;
472 483
473 pgd_start = start & PGDIR_MASK; 484 pgd_start = ppd->vaddr & PGDIR_MASK;
474 pgd_end = end & PGDIR_MASK; 485 pgd_end = ppd->vaddr_end & PGDIR_MASK;
475 486
476 pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1); 487 pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
477 pgd_size *= sizeof(pgd_t);
478 488
479 pgd_p = pgd_base + pgd_index(start); 489 pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
480 490
481 memset(pgd_p, 0, pgd_size); 491 memset(pgd_p, 0, pgd_size);
482} 492}
483 493
484#define PGD_FLAGS _KERNPG_TABLE_NOENC 494#define PGD_FLAGS _KERNPG_TABLE_NOENC
485#define P4D_FLAGS _KERNPG_TABLE_NOENC 495#define P4D_FLAGS _KERNPG_TABLE_NOENC
486#define PUD_FLAGS _KERNPG_TABLE_NOENC 496#define PUD_FLAGS _KERNPG_TABLE_NOENC
487#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) 497#define PMD_FLAGS _KERNPG_TABLE_NOENC
498
499#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
500
501#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
502#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
503 (_PAGE_PAT | _PAGE_PWT))
504
505#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
506
507#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
508
509#define PTE_FLAGS_DEC PTE_FLAGS
510#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
511 (_PAGE_PAT | _PAGE_PWT))
512
513#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
488 514
489static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, 515static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
490 unsigned long vaddr, pmdval_t pmd_val)
491{ 516{
492 pgd_t *pgd_p; 517 pgd_t *pgd_p;
493 p4d_t *p4d_p; 518 p4d_t *p4d_p;
494 pud_t *pud_p; 519 pud_t *pud_p;
495 pmd_t *pmd_p; 520 pmd_t *pmd_p;
496 521
497 pgd_p = pgd_base + pgd_index(vaddr); 522 pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
498 if (native_pgd_val(*pgd_p)) { 523 if (native_pgd_val(*pgd_p)) {
499 if (IS_ENABLED(CONFIG_X86_5LEVEL)) 524 if (IS_ENABLED(CONFIG_X86_5LEVEL))
500 p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); 525 p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
@@ -504,15 +529,15 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
504 pgd_t pgd; 529 pgd_t pgd;
505 530
506 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 531 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
507 p4d_p = pgtable_area; 532 p4d_p = ppd->pgtable_area;
508 memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); 533 memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
509 pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; 534 ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
510 535
511 pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); 536 pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
512 } else { 537 } else {
513 pud_p = pgtable_area; 538 pud_p = ppd->pgtable_area;
514 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); 539 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
515 pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; 540 ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
516 541
517 pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); 542 pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
518 } 543 }
@@ -520,58 +545,160 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
520 } 545 }
521 546
522 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 547 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
523 p4d_p += p4d_index(vaddr); 548 p4d_p += p4d_index(ppd->vaddr);
524 if (native_p4d_val(*p4d_p)) { 549 if (native_p4d_val(*p4d_p)) {
525 pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); 550 pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
526 } else { 551 } else {
527 p4d_t p4d; 552 p4d_t p4d;
528 553
529 pud_p = pgtable_area; 554 pud_p = ppd->pgtable_area;
530 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); 555 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
531 pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; 556 ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
532 557
533 p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS); 558 p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
534 native_set_p4d(p4d_p, p4d); 559 native_set_p4d(p4d_p, p4d);
535 } 560 }
536 } 561 }
537 562
538 pud_p += pud_index(vaddr); 563 pud_p += pud_index(ppd->vaddr);
539 if (native_pud_val(*pud_p)) { 564 if (native_pud_val(*pud_p)) {
540 if (native_pud_val(*pud_p) & _PAGE_PSE) 565 if (native_pud_val(*pud_p) & _PAGE_PSE)
541 goto out; 566 return NULL;
542 567
543 pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); 568 pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
544 } else { 569 } else {
545 pud_t pud; 570 pud_t pud;
546 571
547 pmd_p = pgtable_area; 572 pmd_p = ppd->pgtable_area;
548 memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); 573 memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
549 pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; 574 ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
550 575
551 pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS); 576 pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
552 native_set_pud(pud_p, pud); 577 native_set_pud(pud_p, pud);
553 } 578 }
554 579
555 pmd_p += pmd_index(vaddr); 580 return pmd_p;
581}
582
583static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
584{
585 pmd_t *pmd_p;
586
587 pmd_p = sme_prepare_pgd(ppd);
588 if (!pmd_p)
589 return;
590
591 pmd_p += pmd_index(ppd->vaddr);
556 if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) 592 if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
557 native_set_pmd(pmd_p, native_make_pmd(pmd_val)); 593 native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags));
594}
558 595
559out: 596static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
560 return pgtable_area; 597{
598 pmd_t *pmd_p;
599 pte_t *pte_p;
600
601 pmd_p = sme_prepare_pgd(ppd);
602 if (!pmd_p)
603 return;
604
605 pmd_p += pmd_index(ppd->vaddr);
606 if (native_pmd_val(*pmd_p)) {
607 if (native_pmd_val(*pmd_p) & _PAGE_PSE)
608 return;
609
610 pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
611 } else {
612 pmd_t pmd;
613
614 pte_p = ppd->pgtable_area;
615 memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
616 ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
617
618 pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
619 native_set_pmd(pmd_p, pmd);
620 }
621
622 pte_p += pte_index(ppd->vaddr);
623 if (!native_pte_val(*pte_p))
624 native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags));
625}
626
627static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
628{
629 while (ppd->vaddr < ppd->vaddr_end) {
630 sme_populate_pgd_large(ppd);
631
632 ppd->vaddr += PMD_PAGE_SIZE;
633 ppd->paddr += PMD_PAGE_SIZE;
634 }
635}
636
637static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
638{
639 while (ppd->vaddr < ppd->vaddr_end) {
640 sme_populate_pgd(ppd);
641
642 ppd->vaddr += PAGE_SIZE;
643 ppd->paddr += PAGE_SIZE;
644 }
645}
646
647static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
648 pmdval_t pmd_flags, pteval_t pte_flags)
649{
650 unsigned long vaddr_end;
651
652 ppd->pmd_flags = pmd_flags;
653 ppd->pte_flags = pte_flags;
654
655 /* Save original end value since we modify the struct value */
656 vaddr_end = ppd->vaddr_end;
657
658 /* If start is not 2MB aligned, create PTE entries */
659 ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
660 __sme_map_range_pte(ppd);
661
662 /* Create PMD entries */
663 ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
664 __sme_map_range_pmd(ppd);
665
666 /* If end is not 2MB aligned, create PTE entries */
667 ppd->vaddr_end = vaddr_end;
668 __sme_map_range_pte(ppd);
669}
670
671static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
672{
673 __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
674}
675
676static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
677{
678 __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
679}
680
681static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
682{
683 __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
561} 684}
562 685
563static unsigned long __init sme_pgtable_calc(unsigned long len) 686static unsigned long __init sme_pgtable_calc(unsigned long len)
564{ 687{
565 unsigned long p4d_size, pud_size, pmd_size; 688 unsigned long p4d_size, pud_size, pmd_size, pte_size;
566 unsigned long total; 689 unsigned long total;
567 690
568 /* 691 /*
569 * Perform a relatively simplistic calculation of the pagetable 692 * Perform a relatively simplistic calculation of the pagetable
570 * entries that are needed. That mappings will be covered by 2MB 693 * entries that are needed. Those mappings will be covered mostly
571 * PMD entries so we can conservatively calculate the required 694 * by 2MB PMD entries so we can conservatively calculate the required
572 * number of P4D, PUD and PMD structures needed to perform the 695 * number of P4D, PUD and PMD structures needed to perform the
573 * mappings. Incrementing the count for each covers the case where 696 * mappings. For mappings that are not 2MB aligned, PTE mappings
574 * the addresses cross entries. 697 * would be needed for the start and end portion of the address range
698 * that fall outside of the 2MB alignment. This results in, at most,
699 * two extra pages to hold PTE entries for each range that is mapped.
700 * Incrementing the count for each covers the case where the addresses
701 * cross entries.
575 */ 702 */
576 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 703 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
577 p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; 704 p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
@@ -585,8 +712,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
585 } 712 }
586 pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; 713 pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
587 pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; 714 pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
715 pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;
588 716
589 total = p4d_size + pud_size + pmd_size; 717 total = p4d_size + pud_size + pmd_size + pte_size;
590 718
591 /* 719 /*
592 * Now calculate the added pagetable structures needed to populate 720 * Now calculate the added pagetable structures needed to populate
@@ -610,29 +738,29 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
610 return total; 738 return total;
611} 739}
612 740
613void __init sme_encrypt_kernel(void) 741void __init sme_encrypt_kernel(struct boot_params *bp)
614{ 742{
615 unsigned long workarea_start, workarea_end, workarea_len; 743 unsigned long workarea_start, workarea_end, workarea_len;
616 unsigned long execute_start, execute_end, execute_len; 744 unsigned long execute_start, execute_end, execute_len;
617 unsigned long kernel_start, kernel_end, kernel_len; 745 unsigned long kernel_start, kernel_end, kernel_len;
746 unsigned long initrd_start, initrd_end, initrd_len;
747 struct sme_populate_pgd_data ppd;
618 unsigned long pgtable_area_len; 748 unsigned long pgtable_area_len;
619 unsigned long paddr, pmd_flags;
620 unsigned long decrypted_base; 749 unsigned long decrypted_base;
621 void *pgtable_area;
622 pgd_t *pgd;
623 750
624 if (!sme_active()) 751 if (!sme_active())
625 return; 752 return;
626 753
627 /* 754 /*
628 * Prepare for encrypting the kernel by building new pagetables with 755 * Prepare for encrypting the kernel and initrd by building new
629 * the necessary attributes needed to encrypt the kernel in place. 756 * pagetables with the necessary attributes needed to encrypt the
757 * kernel in place.
630 * 758 *
631 * One range of virtual addresses will map the memory occupied 759 * One range of virtual addresses will map the memory occupied
632 * by the kernel as encrypted. 760 * by the kernel and initrd as encrypted.
633 * 761 *
634 * Another range of virtual addresses will map the memory occupied 762 * Another range of virtual addresses will map the memory occupied
635 * by the kernel as decrypted and write-protected. 763 * by the kernel and initrd as decrypted and write-protected.
636 * 764 *
637 * The use of write-protect attribute will prevent any of the 765 * The use of write-protect attribute will prevent any of the
638 * memory from being cached. 766 * memory from being cached.
@@ -643,6 +771,20 @@ void __init sme_encrypt_kernel(void)
643 kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); 771 kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
644 kernel_len = kernel_end - kernel_start; 772 kernel_len = kernel_end - kernel_start;
645 773
774 initrd_start = 0;
775 initrd_end = 0;
776 initrd_len = 0;
777#ifdef CONFIG_BLK_DEV_INITRD
778 initrd_len = (unsigned long)bp->hdr.ramdisk_size |
779 ((unsigned long)bp->ext_ramdisk_size << 32);
780 if (initrd_len) {
781 initrd_start = (unsigned long)bp->hdr.ramdisk_image |
782 ((unsigned long)bp->ext_ramdisk_image << 32);
783 initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
784 initrd_len = initrd_end - initrd_start;
785 }
786#endif
787
646 /* Set the encryption workarea to be immediately after the kernel */ 788 /* Set the encryption workarea to be immediately after the kernel */
647 workarea_start = kernel_end; 789 workarea_start = kernel_end;
648 790
@@ -665,16 +807,21 @@ void __init sme_encrypt_kernel(void)
665 */ 807 */
666 pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; 808 pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
667 pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; 809 pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
810 if (initrd_len)
811 pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
668 812
669 /* PUDs and PMDs needed in the current pagetables for the workarea */ 813 /* PUDs and PMDs needed in the current pagetables for the workarea */
670 pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); 814 pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
671 815
672 /* 816 /*
673 * The total workarea includes the executable encryption area and 817 * The total workarea includes the executable encryption area and
674 * the pagetable area. 818 * the pagetable area. The start of the workarea is already 2MB
819 * aligned, align the end of the workarea on a 2MB boundary so that
820 * we don't try to create/allocate PTE entries from the workarea
821 * before it is mapped.
675 */ 822 */
676 workarea_len = execute_len + pgtable_area_len; 823 workarea_len = execute_len + pgtable_area_len;
677 workarea_end = workarea_start + workarea_len; 824 workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
678 825
679 /* 826 /*
680 * Set the address to the start of where newly created pagetable 827 * Set the address to the start of where newly created pagetable
@@ -683,45 +830,30 @@ void __init sme_encrypt_kernel(void)
683 * pagetables and when the new encrypted and decrypted kernel 830 * pagetables and when the new encrypted and decrypted kernel
684 * mappings are populated. 831 * mappings are populated.
685 */ 832 */
686 pgtable_area = (void *)execute_end; 833 ppd.pgtable_area = (void *)execute_end;
687 834
688 /* 835 /*
689 * Make sure the current pagetable structure has entries for 836 * Make sure the current pagetable structure has entries for
690 * addressing the workarea. 837 * addressing the workarea.
691 */ 838 */
692 pgd = (pgd_t *)native_read_cr3_pa(); 839 ppd.pgd = (pgd_t *)native_read_cr3_pa();
693 paddr = workarea_start; 840 ppd.paddr = workarea_start;
694 while (paddr < workarea_end) { 841 ppd.vaddr = workarea_start;
695 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 842 ppd.vaddr_end = workarea_end;
696 paddr, 843 sme_map_range_decrypted(&ppd);
697 paddr + PMD_FLAGS);
698
699 paddr += PMD_PAGE_SIZE;
700 }
701 844
702 /* Flush the TLB - no globals so cr3 is enough */ 845 /* Flush the TLB - no globals so cr3 is enough */
703 native_write_cr3(__native_read_cr3()); 846 native_write_cr3(__native_read_cr3());
704 847
705 /* 848 /*
706 * A new pagetable structure is being built to allow for the kernel 849 * A new pagetable structure is being built to allow for the kernel
707 * to be encrypted. It starts with an empty PGD that will then be 850 * and initrd to be encrypted. It starts with an empty PGD that will
708 * populated with new PUDs and PMDs as the encrypted and decrypted 851 * then be populated with new PUDs and PMDs as the encrypted and
709 * kernel mappings are created. 852 * decrypted kernel mappings are created.
710 */ 853 */
711 pgd = pgtable_area; 854 ppd.pgd = ppd.pgtable_area;
712 memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD); 855 memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
713 pgtable_area += sizeof(*pgd) * PTRS_PER_PGD; 856 ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
714
715 /* Add encrypted kernel (identity) mappings */
716 pmd_flags = PMD_FLAGS | _PAGE_ENC;
717 paddr = kernel_start;
718 while (paddr < kernel_end) {
719 pgtable_area = sme_populate_pgd(pgd, pgtable_area,
720 paddr,
721 paddr + pmd_flags);
722
723 paddr += PMD_PAGE_SIZE;
724 }
725 857
726 /* 858 /*
727 * A different PGD index/entry must be used to get different 859 * A different PGD index/entry must be used to get different
@@ -730,47 +862,79 @@ void __init sme_encrypt_kernel(void)
730 * the base of the mapping. 862 * the base of the mapping.
731 */ 863 */
732 decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); 864 decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
865 if (initrd_len) {
866 unsigned long check_base;
867
868 check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
869 decrypted_base = max(decrypted_base, check_base);
870 }
733 decrypted_base <<= PGDIR_SHIFT; 871 decrypted_base <<= PGDIR_SHIFT;
734 872
873 /* Add encrypted kernel (identity) mappings */
874 ppd.paddr = kernel_start;
875 ppd.vaddr = kernel_start;
876 ppd.vaddr_end = kernel_end;
877 sme_map_range_encrypted(&ppd);
878
735 /* Add decrypted, write-protected kernel (non-identity) mappings */ 879 /* Add decrypted, write-protected kernel (non-identity) mappings */
736 pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT); 880 ppd.paddr = kernel_start;
737 paddr = kernel_start; 881 ppd.vaddr = kernel_start + decrypted_base;
738 while (paddr < kernel_end) { 882 ppd.vaddr_end = kernel_end + decrypted_base;
739 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 883 sme_map_range_decrypted_wp(&ppd);
740 paddr + decrypted_base, 884
741 paddr + pmd_flags); 885 if (initrd_len) {
742 886 /* Add encrypted initrd (identity) mappings */
743 paddr += PMD_PAGE_SIZE; 887 ppd.paddr = initrd_start;
888 ppd.vaddr = initrd_start;
889 ppd.vaddr_end = initrd_end;
890 sme_map_range_encrypted(&ppd);
891 /*
892 * Add decrypted, write-protected initrd (non-identity) mappings
893 */
894 ppd.paddr = initrd_start;
895 ppd.vaddr = initrd_start + decrypted_base;
896 ppd.vaddr_end = initrd_end + decrypted_base;
897 sme_map_range_decrypted_wp(&ppd);
744 } 898 }
745 899
746 /* Add decrypted workarea mappings to both kernel mappings */ 900 /* Add decrypted workarea mappings to both kernel mappings */
747 paddr = workarea_start; 901 ppd.paddr = workarea_start;
748 while (paddr < workarea_end) { 902 ppd.vaddr = workarea_start;
749 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 903 ppd.vaddr_end = workarea_end;
750 paddr, 904 sme_map_range_decrypted(&ppd);
751 paddr + PMD_FLAGS);
752 905
753 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 906 ppd.paddr = workarea_start;
754 paddr + decrypted_base, 907 ppd.vaddr = workarea_start + decrypted_base;
755 paddr + PMD_FLAGS); 908 ppd.vaddr_end = workarea_end + decrypted_base;
756 909 sme_map_range_decrypted(&ppd);
757 paddr += PMD_PAGE_SIZE;
758 }
759 910
760 /* Perform the encryption */ 911 /* Perform the encryption */
761 sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, 912 sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
762 kernel_len, workarea_start, (unsigned long)pgd); 913 kernel_len, workarea_start, (unsigned long)ppd.pgd);
914
915 if (initrd_len)
916 sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
917 initrd_len, workarea_start,
918 (unsigned long)ppd.pgd);
763 919
764 /* 920 /*
765 * At this point we are running encrypted. Remove the mappings for 921 * At this point we are running encrypted. Remove the mappings for
766 * the decrypted areas - all that is needed for this is to remove 922 * the decrypted areas - all that is needed for this is to remove
767 * the PGD entry/entries. 923 * the PGD entry/entries.
768 */ 924 */
769 sme_clear_pgd(pgd, kernel_start + decrypted_base, 925 ppd.vaddr = kernel_start + decrypted_base;
770 kernel_end + decrypted_base); 926 ppd.vaddr_end = kernel_end + decrypted_base;
927 sme_clear_pgd(&ppd);
928
929 if (initrd_len) {
930 ppd.vaddr = initrd_start + decrypted_base;
931 ppd.vaddr_end = initrd_end + decrypted_base;
932 sme_clear_pgd(&ppd);
933 }
771 934
772 sme_clear_pgd(pgd, workarea_start + decrypted_base, 935 ppd.vaddr = workarea_start + decrypted_base;
773 workarea_end + decrypted_base); 936 ppd.vaddr_end = workarea_end + decrypted_base;
937 sme_clear_pgd(&ppd);
774 938
775 /* Flush the TLB - no globals so cr3 is enough */ 939 /* Flush the TLB - no globals so cr3 is enough */
776 native_write_cr3(__native_read_cr3()); 940 native_write_cr3(__native_read_cr3());
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 730e6d541df1..01f682cf77a8 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -22,9 +22,9 @@ ENTRY(sme_encrypt_execute)
22 22
23 /* 23 /*
24 * Entry parameters: 24 * Entry parameters:
25 * RDI - virtual address for the encrypted kernel mapping 25 * RDI - virtual address for the encrypted mapping
26 * RSI - virtual address for the decrypted kernel mapping 26 * RSI - virtual address for the decrypted mapping
27 * RDX - length of kernel 27 * RDX - length to encrypt
28 * RCX - virtual address of the encryption workarea, including: 28 * RCX - virtual address of the encryption workarea, including:
29 * - stack page (PAGE_SIZE) 29 * - stack page (PAGE_SIZE)
30 * - encryption routine page (PAGE_SIZE) 30 * - encryption routine page (PAGE_SIZE)
@@ -41,9 +41,9 @@ ENTRY(sme_encrypt_execute)
41 addq $PAGE_SIZE, %rax /* Workarea encryption routine */ 41 addq $PAGE_SIZE, %rax /* Workarea encryption routine */
42 42
43 push %r12 43 push %r12
44 movq %rdi, %r10 /* Encrypted kernel */ 44 movq %rdi, %r10 /* Encrypted area */
45 movq %rsi, %r11 /* Decrypted kernel */ 45 movq %rsi, %r11 /* Decrypted area */
46 movq %rdx, %r12 /* Kernel length */ 46 movq %rdx, %r12 /* Area length */
47 47
48 /* Copy encryption routine into the workarea */ 48 /* Copy encryption routine into the workarea */
49 movq %rax, %rdi /* Workarea encryption routine */ 49 movq %rax, %rdi /* Workarea encryption routine */
@@ -52,10 +52,10 @@ ENTRY(sme_encrypt_execute)
52 rep movsb 52 rep movsb
53 53
54 /* Setup registers for call */ 54 /* Setup registers for call */
55 movq %r10, %rdi /* Encrypted kernel */ 55 movq %r10, %rdi /* Encrypted area */
56 movq %r11, %rsi /* Decrypted kernel */ 56 movq %r11, %rsi /* Decrypted area */
57 movq %r8, %rdx /* Pagetables used for encryption */ 57 movq %r8, %rdx /* Pagetables used for encryption */
58 movq %r12, %rcx /* Kernel length */ 58 movq %r12, %rcx /* Area length */
59 movq %rax, %r8 /* Workarea encryption routine */ 59 movq %rax, %r8 /* Workarea encryption routine */
60 addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */ 60 addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */
61 61
@@ -71,7 +71,7 @@ ENDPROC(sme_encrypt_execute)
71 71
72ENTRY(__enc_copy) 72ENTRY(__enc_copy)
73/* 73/*
74 * Routine used to encrypt kernel. 74 * Routine used to encrypt memory in place.
75 * This routine must be run outside of the kernel proper since 75 * This routine must be run outside of the kernel proper since
76 * the kernel will be encrypted during the process. So this 76 * the kernel will be encrypted during the process. So this
77 * routine is defined here and then copied to an area outside 77 * routine is defined here and then copied to an area outside
@@ -79,19 +79,19 @@ ENTRY(__enc_copy)
79 * during execution. 79 * during execution.
80 * 80 *
81 * On entry the registers must be: 81 * On entry the registers must be:
82 * RDI - virtual address for the encrypted kernel mapping 82 * RDI - virtual address for the encrypted mapping
83 * RSI - virtual address for the decrypted kernel mapping 83 * RSI - virtual address for the decrypted mapping
84 * RDX - address of the pagetables to use for encryption 84 * RDX - address of the pagetables to use for encryption
85 * RCX - length of kernel 85 * RCX - length of area
86 * R8 - intermediate copy buffer 86 * R8 - intermediate copy buffer
87 * 87 *
88 * RAX - points to this routine 88 * RAX - points to this routine
89 * 89 *
90 * The kernel will be encrypted by copying from the non-encrypted 90 * The area will be encrypted by copying from the non-encrypted
91 * kernel space to an intermediate buffer and then copying from the 91 * memory space to an intermediate buffer and then copying from the
92 * intermediate buffer back to the encrypted kernel space. The physical 92 * intermediate buffer back to the encrypted memory space. The physical
93 * addresses of the two kernel space mappings are the same which 93 * addresses of the two mappings are the same which results in the area
94 * results in the kernel being encrypted "in place". 94 * being encrypted "in place".
95 */ 95 */
96 /* Enable the new page tables */ 96 /* Enable the new page tables */
97 mov %rdx, %cr3 97 mov %rdx, %cr3
@@ -103,47 +103,55 @@ ENTRY(__enc_copy)
103 orq $X86_CR4_PGE, %rdx 103 orq $X86_CR4_PGE, %rdx
104 mov %rdx, %cr4 104 mov %rdx, %cr4
105 105
106 push %r15
107 push %r12
108
109 movq %rcx, %r9 /* Save area length */
110 movq %rdi, %r10 /* Save encrypted area address */
111 movq %rsi, %r11 /* Save decrypted area address */
112
106 /* Set the PAT register PA5 entry to write-protect */ 113 /* Set the PAT register PA5 entry to write-protect */
107 push %rcx
108 movl $MSR_IA32_CR_PAT, %ecx 114 movl $MSR_IA32_CR_PAT, %ecx
109 rdmsr 115 rdmsr
110 push %rdx /* Save original PAT value */ 116 mov %rdx, %r15 /* Save original PAT value */
111 andl $0xffff00ff, %edx /* Clear PA5 */ 117 andl $0xffff00ff, %edx /* Clear PA5 */
112 orl $0x00000500, %edx /* Set PA5 to WP */ 118 orl $0x00000500, %edx /* Set PA5 to WP */
113 wrmsr 119 wrmsr
114 pop %rdx /* RDX contains original PAT value */
115 pop %rcx
116
117 movq %rcx, %r9 /* Save kernel length */
118 movq %rdi, %r10 /* Save encrypted kernel address */
119 movq %rsi, %r11 /* Save decrypted kernel address */
120 120
121 wbinvd /* Invalidate any cache entries */ 121 wbinvd /* Invalidate any cache entries */
122 122
123 /* Copy/encrypt 2MB at a time */ 123 /* Copy/encrypt up to 2MB at a time */
124 movq $PMD_PAGE_SIZE, %r12
1241: 1251:
125 movq %r11, %rsi /* Source - decrypted kernel */ 126 cmpq %r12, %r9
127 jnb 2f
128 movq %r9, %r12
129
1302:
131 movq %r11, %rsi /* Source - decrypted area */
126 movq %r8, %rdi /* Dest - intermediate copy buffer */ 132 movq %r8, %rdi /* Dest - intermediate copy buffer */
127 movq $PMD_PAGE_SIZE, %rcx /* 2MB length */ 133 movq %r12, %rcx
128 rep movsb 134 rep movsb
129 135
130 movq %r8, %rsi /* Source - intermediate copy buffer */ 136 movq %r8, %rsi /* Source - intermediate copy buffer */
131 movq %r10, %rdi /* Dest - encrypted kernel */ 137 movq %r10, %rdi /* Dest - encrypted area */
132 movq $PMD_PAGE_SIZE, %rcx /* 2MB length */ 138 movq %r12, %rcx
133 rep movsb 139 rep movsb
134 140
135 addq $PMD_PAGE_SIZE, %r11 141 addq %r12, %r11
136 addq $PMD_PAGE_SIZE, %r10 142 addq %r12, %r10
137 subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */ 143 subq %r12, %r9 /* Kernel length decrement */
138 jnz 1b /* Kernel length not zero? */ 144 jnz 1b /* Kernel length not zero? */
139 145
140 /* Restore PAT register */ 146 /* Restore PAT register */
141 push %rdx /* Save original PAT value */
142 movl $MSR_IA32_CR_PAT, %ecx 147 movl $MSR_IA32_CR_PAT, %ecx
143 rdmsr 148 rdmsr
144 pop %rdx /* Restore original PAT value */ 149 mov %r15, %rdx /* Restore original PAT value */
145 wrmsr 150 wrmsr
146 151
152 pop %r12
153 pop %r15
154
147 ret 155 ret
148.L__enc_copy_end: 156.L__enc_copy_end:
149ENDPROC(__enc_copy) 157ENDPROC(__enc_copy)