diff options
| -rw-r--r-- | arch/x86/boot/compressed/head_64.S | 79 | ||||
| -rw-r--r-- | arch/x86/boot/compressed/pgtable_64.c | 14 | ||||
| -rw-r--r-- | arch/x86/include/asm/mmu_context.h | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/pkeys.h | 18 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/x2apic_cluster.c | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/head64.c | 10 | ||||
| -rw-r--r-- | arch/x86/kernel/process_64.c | 1 | ||||
| -rw-r--r-- | arch/x86/mm/pkeys.c | 21 | ||||
| -rw-r--r-- | tools/testing/selftests/x86/Makefile | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/x86/mov_ss_trap.c | 285 | ||||
| -rw-r--r-- | tools/testing/selftests/x86/mpx-mini-test.c | 7 | ||||
| -rw-r--r-- | tools/testing/selftests/x86/pkey-helpers.h | 20 | ||||
| -rw-r--r-- | tools/testing/selftests/x86/protection_keys.c | 254 |
13 files changed, 585 insertions, 129 deletions
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fca012baba19..8169e8b7a4dc 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
| @@ -306,6 +306,25 @@ ENTRY(startup_64) | |||
| 306 | leaq boot_stack_end(%rbx), %rsp | 306 | leaq boot_stack_end(%rbx), %rsp |
| 307 | 307 | ||
| 308 | /* | 308 | /* |
| 309 | * paging_prepare() and cleanup_trampoline() below can have GOT | ||
| 310 | * references. Adjust the table with address we are running at. | ||
| 311 | * | ||
| 312 | * Zero RAX for adjust_got: the GOT was not adjusted before; | ||
| 313 | * there's no adjustment to undo. | ||
| 314 | */ | ||
| 315 | xorq %rax, %rax | ||
| 316 | |||
| 317 | /* | ||
| 318 | * Calculate the address the binary is loaded at and use it as | ||
| 319 | * a GOT adjustment. | ||
| 320 | */ | ||
| 321 | call 1f | ||
| 322 | 1: popq %rdi | ||
| 323 | subq $1b, %rdi | ||
| 324 | |||
| 325 | call adjust_got | ||
| 326 | |||
| 327 | /* | ||
| 309 | * At this point we are in long mode with 4-level paging enabled, | 328 | * At this point we are in long mode with 4-level paging enabled, |
| 310 | * but we might want to enable 5-level paging or vice versa. | 329 | * but we might want to enable 5-level paging or vice versa. |
| 311 | * | 330 | * |
| @@ -370,10 +389,14 @@ trampoline_return: | |||
| 370 | /* | 389 | /* |
| 371 | * cleanup_trampoline() would restore trampoline memory. | 390 | * cleanup_trampoline() would restore trampoline memory. |
| 372 | * | 391 | * |
| 392 | * RDI is address of the page table to use instead of page table | ||
| 393 | * in trampoline memory (if required). | ||
| 394 | * | ||
| 373 | * RSI holds real mode data and needs to be preserved across | 395 | * RSI holds real mode data and needs to be preserved across |
| 374 | * this function call. | 396 | * this function call. |
| 375 | */ | 397 | */ |
| 376 | pushq %rsi | 398 | pushq %rsi |
| 399 | leaq top_pgtable(%rbx), %rdi | ||
| 377 | call cleanup_trampoline | 400 | call cleanup_trampoline |
| 378 | popq %rsi | 401 | popq %rsi |
| 379 | 402 | ||
| @@ -381,6 +404,21 @@ trampoline_return: | |||
| 381 | pushq $0 | 404 | pushq $0 |
| 382 | popfq | 405 | popfq |
| 383 | 406 | ||
| 407 | /* | ||
| 408 | * Previously we've adjusted the GOT with address the binary was | ||
| 409 | * loaded at. Now we need to re-adjust for relocation address. | ||
| 410 | * | ||
| 411 | * Calculate the address the binary is loaded at, so that we can | ||
| 412 | * undo the previous GOT adjustment. | ||
| 413 | */ | ||
| 414 | call 1f | ||
| 415 | 1: popq %rax | ||
| 416 | subq $1b, %rax | ||
| 417 | |||
| 418 | /* The new adjustment is the relocation address */ | ||
| 419 | movq %rbx, %rdi | ||
| 420 | call adjust_got | ||
| 421 | |||
| 384 | /* | 422 | /* |
| 385 | * Copy the compressed kernel to the end of our buffer | 423 | * Copy the compressed kernel to the end of our buffer |
| 386 | * where decompression in place becomes safe. | 424 | * where decompression in place becomes safe. |
| @@ -482,19 +520,6 @@ relocated: | |||
| 482 | rep stosq | 520 | rep stosq |
| 483 | 521 | ||
| 484 | /* | 522 | /* |
| 485 | * Adjust our own GOT | ||
| 486 | */ | ||
| 487 | leaq _got(%rip), %rdx | ||
| 488 | leaq _egot(%rip), %rcx | ||
| 489 | 1: | ||
| 490 | cmpq %rcx, %rdx | ||
| 491 | jae 2f | ||
| 492 | addq %rbx, (%rdx) | ||
| 493 | addq $8, %rdx | ||
| 494 | jmp 1b | ||
| 495 | 2: | ||
| 496 | |||
| 497 | /* | ||
| 498 | * Do the extraction, and jump to the new kernel.. | 523 | * Do the extraction, and jump to the new kernel.. |
| 499 | */ | 524 | */ |
| 500 | pushq %rsi /* Save the real mode argument */ | 525 | pushq %rsi /* Save the real mode argument */ |
| @@ -512,6 +537,27 @@ relocated: | |||
| 512 | */ | 537 | */ |
| 513 | jmp *%rax | 538 | jmp *%rax |
| 514 | 539 | ||
| 540 | /* | ||
| 541 | * Adjust the global offset table | ||
| 542 | * | ||
| 543 | * RAX is the previous adjustment of the table to undo (use 0 if it's the | ||
| 544 | * first time we touch GOT). | ||
| 545 | * RDI is the new adjustment to apply. | ||
| 546 | */ | ||
| 547 | adjust_got: | ||
| 548 | /* Walk through the GOT adding the address to the entries */ | ||
| 549 | leaq _got(%rip), %rdx | ||
| 550 | leaq _egot(%rip), %rcx | ||
| 551 | 1: | ||
| 552 | cmpq %rcx, %rdx | ||
| 553 | jae 2f | ||
| 554 | subq %rax, (%rdx) /* Undo previous adjustment */ | ||
| 555 | addq %rdi, (%rdx) /* Apply the new adjustment */ | ||
| 556 | addq $8, %rdx | ||
| 557 | jmp 1b | ||
| 558 | 2: | ||
| 559 | ret | ||
| 560 | |||
| 515 | .code32 | 561 | .code32 |
| 516 | /* | 562 | /* |
| 517 | * This is the 32-bit trampoline that will be copied over to low memory. | 563 | * This is the 32-bit trampoline that will be copied over to low memory. |
| @@ -649,3 +695,10 @@ boot_stack_end: | |||
| 649 | .balign 4096 | 695 | .balign 4096 |
| 650 | pgtable: | 696 | pgtable: |
| 651 | .fill BOOT_PGT_SIZE, 1, 0 | 697 | .fill BOOT_PGT_SIZE, 1, 0 |
| 698 | |||
| 699 | /* | ||
| 700 | * The page table is going to be used instead of page table in the trampoline | ||
| 701 | * memory. | ||
| 702 | */ | ||
| 703 | top_pgtable: | ||
| 704 | .fill PAGE_SIZE, 1, 0 | ||
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 32af1cbcd903..a362fa0b849c 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c | |||
| @@ -23,14 +23,6 @@ struct paging_config { | |||
| 23 | static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; | 23 | static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; |
| 24 | 24 | ||
| 25 | /* | 25 | /* |
| 26 | * The page table is going to be used instead of page table in the trampoline | ||
| 27 | * memory. | ||
| 28 | * | ||
| 29 | * It must not be in BSS as BSS is cleared after cleanup_trampoline(). | ||
| 30 | */ | ||
| 31 | static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data); | ||
| 32 | |||
| 33 | /* | ||
| 34 | * Trampoline address will be printed by extract_kernel() for debugging | 26 | * Trampoline address will be printed by extract_kernel() for debugging |
| 35 | * purposes. | 27 | * purposes. |
| 36 | * | 28 | * |
| @@ -134,7 +126,7 @@ out: | |||
| 134 | return paging_config; | 126 | return paging_config; |
| 135 | } | 127 | } |
| 136 | 128 | ||
| 137 | void cleanup_trampoline(void) | 129 | void cleanup_trampoline(void *pgtable) |
| 138 | { | 130 | { |
| 139 | void *trampoline_pgtable; | 131 | void *trampoline_pgtable; |
| 140 | 132 | ||
| @@ -145,8 +137,8 @@ void cleanup_trampoline(void) | |||
| 145 | * if it's there. | 137 | * if it's there. |
| 146 | */ | 138 | */ |
| 147 | if ((void *)__native_read_cr3() == trampoline_pgtable) { | 139 | if ((void *)__native_read_cr3() == trampoline_pgtable) { |
| 148 | memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE); | 140 | memcpy(pgtable, trampoline_pgtable, PAGE_SIZE); |
| 149 | native_write_cr3((unsigned long)top_pgtable); | 141 | native_write_cr3((unsigned long)pgtable); |
| 150 | } | 142 | } |
| 151 | 143 | ||
| 152 | /* Restore trampoline memory */ | 144 | /* Restore trampoline memory */ |
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 57e3785d0d26..cf9911b5a53c 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h | |||
| @@ -193,7 +193,7 @@ static inline int init_new_context(struct task_struct *tsk, | |||
| 193 | 193 | ||
| 194 | #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS | 194 | #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS |
| 195 | if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { | 195 | if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { |
| 196 | /* pkey 0 is the default and always allocated */ | 196 | /* pkey 0 is the default and allocated implicitly */ |
| 197 | mm->context.pkey_allocation_map = 0x1; | 197 | mm->context.pkey_allocation_map = 0x1; |
| 198 | /* -1 means unallocated or invalid */ | 198 | /* -1 means unallocated or invalid */ |
| 199 | mm->context.execute_only_pkey = -1; | 199 | mm->context.execute_only_pkey = -1; |
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index a0ba1ffda0df..851c04b7a092 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | #ifndef _ASM_X86_PKEYS_H | 2 | #ifndef _ASM_X86_PKEYS_H |
| 3 | #define _ASM_X86_PKEYS_H | 3 | #define _ASM_X86_PKEYS_H |
| 4 | 4 | ||
| 5 | #define ARCH_DEFAULT_PKEY 0 | ||
| 6 | |||
| 5 | #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) | 7 | #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) |
| 6 | 8 | ||
| 7 | extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, | 9 | extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, |
| @@ -15,7 +17,7 @@ extern int __execute_only_pkey(struct mm_struct *mm); | |||
| 15 | static inline int execute_only_pkey(struct mm_struct *mm) | 17 | static inline int execute_only_pkey(struct mm_struct *mm) |
| 16 | { | 18 | { |
| 17 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) | 19 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) |
| 18 | return 0; | 20 | return ARCH_DEFAULT_PKEY; |
| 19 | 21 | ||
| 20 | return __execute_only_pkey(mm); | 22 | return __execute_only_pkey(mm); |
| 21 | } | 23 | } |
| @@ -49,13 +51,21 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) | |||
| 49 | { | 51 | { |
| 50 | /* | 52 | /* |
| 51 | * "Allocated" pkeys are those that have been returned | 53 | * "Allocated" pkeys are those that have been returned |
| 52 | * from pkey_alloc(). pkey 0 is special, and never | 54 | * from pkey_alloc() or pkey 0 which is allocated |
| 53 | * returned from pkey_alloc(). | 55 | * implicitly when the mm is created. |
| 54 | */ | 56 | */ |
| 55 | if (pkey <= 0) | 57 | if (pkey < 0) |
| 56 | return false; | 58 | return false; |
| 57 | if (pkey >= arch_max_pkey()) | 59 | if (pkey >= arch_max_pkey()) |
| 58 | return false; | 60 | return false; |
| 61 | /* | ||
| 62 | * The exec-only pkey is set in the allocation map, but | ||
| 63 | * is not available to any of the user interfaces like | ||
| 64 | * mprotect_pkey(). | ||
| 65 | */ | ||
| 66 | if (pkey == mm->context.execute_only_pkey) | ||
| 67 | return false; | ||
| 68 | |||
| 59 | return mm_pkey_allocation_map(mm) & (1U << pkey); | 69 | return mm_pkey_allocation_map(mm) & (1U << pkey); |
| 60 | } | 70 | } |
| 61 | 71 | ||
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 8b04234e010b..7685444a106b 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
| @@ -116,6 +116,7 @@ static void init_x2apic_ldr(void) | |||
| 116 | goto update; | 116 | goto update; |
| 117 | } | 117 | } |
| 118 | cmsk = cluster_hotplug_mask; | 118 | cmsk = cluster_hotplug_mask; |
| 119 | cmsk->clusterid = cluster; | ||
| 119 | cluster_hotplug_mask = NULL; | 120 | cluster_hotplug_mask = NULL; |
| 120 | update: | 121 | update: |
| 121 | this_cpu_write(cluster_masks, cmsk); | 122 | this_cpu_write(cluster_masks, cmsk); |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 0c408f8c4ed4..2d29e47c056e 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
| @@ -104,6 +104,12 @@ static bool __head check_la57_support(unsigned long physaddr) | |||
| 104 | } | 104 | } |
| 105 | #endif | 105 | #endif |
| 106 | 106 | ||
| 107 | /* Code in __startup_64() can be relocated during execution, but the compiler | ||
| 108 | * doesn't have to generate PC-relative relocations when accessing globals from | ||
| 109 | * that function. Clang actually does not generate them, which leads to | ||
| 110 | * boot-time crashes. To work around this problem, every global pointer must | ||
| 111 | * be adjusted using fixup_pointer(). | ||
| 112 | */ | ||
| 107 | unsigned long __head __startup_64(unsigned long physaddr, | 113 | unsigned long __head __startup_64(unsigned long physaddr, |
| 108 | struct boot_params *bp) | 114 | struct boot_params *bp) |
| 109 | { | 115 | { |
| @@ -113,6 +119,7 @@ unsigned long __head __startup_64(unsigned long physaddr, | |||
| 113 | p4dval_t *p4d; | 119 | p4dval_t *p4d; |
| 114 | pudval_t *pud; | 120 | pudval_t *pud; |
| 115 | pmdval_t *pmd, pmd_entry; | 121 | pmdval_t *pmd, pmd_entry; |
| 122 | pteval_t *mask_ptr; | ||
| 116 | bool la57; | 123 | bool la57; |
| 117 | int i; | 124 | int i; |
| 118 | unsigned int *next_pgt_ptr; | 125 | unsigned int *next_pgt_ptr; |
| @@ -196,7 +203,8 @@ unsigned long __head __startup_64(unsigned long physaddr, | |||
| 196 | 203 | ||
| 197 | pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; | 204 | pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; |
| 198 | /* Filter out unsupported __PAGE_KERNEL_* bits: */ | 205 | /* Filter out unsupported __PAGE_KERNEL_* bits: */ |
| 199 | pmd_entry &= __supported_pte_mask; | 206 | mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr); |
| 207 | pmd_entry &= *mask_ptr; | ||
| 200 | pmd_entry += sme_get_me_mask(); | 208 | pmd_entry += sme_get_me_mask(); |
| 201 | pmd_entry += physaddr; | 209 | pmd_entry += physaddr; |
| 202 | 210 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4b100fe0f508..12bb445fb98d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -542,6 +542,7 @@ void set_personality_64bit(void) | |||
| 542 | clear_thread_flag(TIF_X32); | 542 | clear_thread_flag(TIF_X32); |
| 543 | /* Pretend that this comes from a 64bit execve */ | 543 | /* Pretend that this comes from a 64bit execve */ |
| 544 | task_pt_regs(current)->orig_ax = __NR_execve; | 544 | task_pt_regs(current)->orig_ax = __NR_execve; |
| 545 | current_thread_info()->status &= ~TS_COMPAT; | ||
| 545 | 546 | ||
| 546 | /* Ensure the corresponding mm is not marked. */ | 547 | /* Ensure the corresponding mm is not marked. */ |
| 547 | if (current->mm) | 548 | if (current->mm) |
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index d7bc0eea20a5..6e98e0a7c923 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c | |||
| @@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey | |||
| 94 | */ | 94 | */ |
| 95 | if (pkey != -1) | 95 | if (pkey != -1) |
| 96 | return pkey; | 96 | return pkey; |
| 97 | /* | 97 | |
| 98 | * Look for a protection-key-drive execute-only mapping | ||
| 99 | * which is now being given permissions that are not | ||
| 100 | * execute-only. Move it back to the default pkey. | ||
| 101 | */ | ||
| 102 | if (vma_is_pkey_exec_only(vma) && | ||
| 103 | (prot & (PROT_READ|PROT_WRITE))) { | ||
| 104 | return 0; | ||
| 105 | } | ||
| 106 | /* | 98 | /* |
| 107 | * The mapping is execute-only. Go try to get the | 99 | * The mapping is execute-only. Go try to get the |
| 108 | * execute-only protection key. If we fail to do that, | 100 | * execute-only protection key. If we fail to do that, |
| 109 | * fall through as if we do not have execute-only | 101 | * fall through as if we do not have execute-only |
| 110 | * support. | 102 | * support in this mm. |
| 111 | */ | 103 | */ |
| 112 | if (prot == PROT_EXEC) { | 104 | if (prot == PROT_EXEC) { |
| 113 | pkey = execute_only_pkey(vma->vm_mm); | 105 | pkey = execute_only_pkey(vma->vm_mm); |
| 114 | if (pkey > 0) | 106 | if (pkey > 0) |
| 115 | return pkey; | 107 | return pkey; |
| 108 | } else if (vma_is_pkey_exec_only(vma)) { | ||
| 109 | /* | ||
| 110 | * Protections are *not* PROT_EXEC, but the mapping | ||
| 111 | * is using the exec-only pkey. This mapping was | ||
| 112 | * PROT_EXEC and will no longer be. Move back to | ||
| 113 | * the default pkey. | ||
| 114 | */ | ||
| 115 | return ARCH_DEFAULT_PKEY; | ||
| 116 | } | 116 | } |
| 117 | |||
| 117 | /* | 118 | /* |
| 118 | * This is a vanilla, non-pkey mprotect (or we failed to | 119 | * This is a vanilla, non-pkey mprotect (or we failed to |
| 119 | * setup execute-only), inherit the pkey from the VMA we | 120 | * setup execute-only), inherit the pkey from the VMA we |
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index d744991c0f4f..39f66bc29b82 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile | |||
| @@ -11,7 +11,7 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) | |||
| 11 | 11 | ||
| 12 | TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ | 12 | TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ |
| 13 | check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ | 13 | check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ |
| 14 | protection_keys test_vdso test_vsyscall | 14 | protection_keys test_vdso test_vsyscall mov_ss_trap |
| 15 | TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ | 15 | TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ |
| 16 | test_FCMOV test_FCOMI test_FISTTP \ | 16 | test_FCMOV test_FCOMI test_FISTTP \ |
| 17 | vdso_restorer | 17 | vdso_restorer |
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c new file mode 100644 index 000000000000..3c3a022654f3 --- /dev/null +++ b/tools/testing/selftests/x86/mov_ss_trap.c | |||
| @@ -0,0 +1,285 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | /* | ||
| 3 | * mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS | ||
| 4 | * | ||
| 5 | * This does MOV SS from a watchpointed address followed by various | ||
| 6 | * types of kernel entries. A MOV SS that hits a watchpoint will queue | ||
| 7 | * up a #DB trap but will not actually deliver that trap. The trap | ||
| 8 | * will be delivered after the next instruction instead. The CPU's logic | ||
| 9 | * seems to be: | ||
| 10 | * | ||
| 11 | * - Any fault: drop the pending #DB trap. | ||
| 12 | * - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then | ||
| 13 | * deliver #DB. | ||
| 14 | * - ICEBP: enter the kernel but do not deliver the watchpoint trap | ||
| 15 | * - breakpoint: only one #DB is delivered (phew!) | ||
| 16 | * | ||
| 17 | * There are plenty of ways for a kernel to handle this incorrectly. This | ||
| 18 | * test tries to exercise all the cases. | ||
| 19 | * | ||
| 20 | * This should mostly cover CVE-2018-1087 and CVE-2018-8897. | ||
| 21 | */ | ||
| 22 | #define _GNU_SOURCE | ||
| 23 | |||
| 24 | #include <stdlib.h> | ||
| 25 | #include <sys/ptrace.h> | ||
| 26 | #include <sys/types.h> | ||
| 27 | #include <sys/wait.h> | ||
| 28 | #include <sys/user.h> | ||
| 29 | #include <sys/syscall.h> | ||
| 30 | #include <unistd.h> | ||
| 31 | #include <errno.h> | ||
| 32 | #include <stddef.h> | ||
| 33 | #include <stdio.h> | ||
| 34 | #include <err.h> | ||
| 35 | #include <string.h> | ||
| 36 | #include <setjmp.h> | ||
| 37 | #include <sys/prctl.h> | ||
| 38 | |||
| 39 | #define X86_EFLAGS_RF (1UL << 16) | ||
| 40 | |||
| 41 | #if __x86_64__ | ||
| 42 | # define REG_IP REG_RIP | ||
| 43 | #else | ||
| 44 | # define REG_IP REG_EIP | ||
| 45 | #endif | ||
| 46 | |||
| 47 | unsigned short ss; | ||
| 48 | extern unsigned char breakpoint_insn[]; | ||
| 49 | sigjmp_buf jmpbuf; | ||
| 50 | static unsigned char altstack_data[SIGSTKSZ]; | ||
| 51 | |||
| 52 | static void enable_watchpoint(void) | ||
| 53 | { | ||
| 54 | pid_t parent = getpid(); | ||
| 55 | int status; | ||
| 56 | |||
| 57 | pid_t child = fork(); | ||
| 58 | if (child < 0) | ||
| 59 | err(1, "fork"); | ||
| 60 | |||
| 61 | if (child) { | ||
| 62 | if (waitpid(child, &status, 0) != child) | ||
| 63 | err(1, "waitpid for child"); | ||
| 64 | } else { | ||
| 65 | unsigned long dr0, dr1, dr7; | ||
| 66 | |||
| 67 | dr0 = (unsigned long)&ss; | ||
| 68 | dr1 = (unsigned long)breakpoint_insn; | ||
| 69 | dr7 = ((1UL << 1) | /* G0 */ | ||
| 70 | (3UL << 16) | /* RW0 = read or write */ | ||
| 71 | (1UL << 18) | /* LEN0 = 2 bytes */ | ||
| 72 | (1UL << 3)); /* G1, RW1 = insn */ | ||
| 73 | |||
| 74 | if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0) | ||
| 75 | err(1, "PTRACE_ATTACH"); | ||
| 76 | |||
| 77 | if (waitpid(parent, &status, 0) != parent) | ||
| 78 | err(1, "waitpid for child"); | ||
| 79 | |||
| 80 | if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0) | ||
| 81 | err(1, "PTRACE_POKEUSER DR0"); | ||
| 82 | |||
| 83 | if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0) | ||
| 84 | err(1, "PTRACE_POKEUSER DR1"); | ||
| 85 | |||
| 86 | if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0) | ||
| 87 | err(1, "PTRACE_POKEUSER DR7"); | ||
| 88 | |||
| 89 | printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7); | ||
| 90 | |||
| 91 | if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0) | ||
| 92 | err(1, "PTRACE_DETACH"); | ||
| 93 | |||
| 94 | exit(0); | ||
| 95 | } | ||
| 96 | } | ||
| 97 | |||
| 98 | static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), | ||
| 99 | int flags) | ||
| 100 | { | ||
| 101 | struct sigaction sa; | ||
| 102 | memset(&sa, 0, sizeof(sa)); | ||
| 103 | sa.sa_sigaction = handler; | ||
| 104 | sa.sa_flags = SA_SIGINFO | flags; | ||
| 105 | sigemptyset(&sa.sa_mask); | ||
| 106 | if (sigaction(sig, &sa, 0)) | ||
| 107 | err(1, "sigaction"); | ||
| 108 | } | ||
| 109 | |||
| 110 | static char const * const signames[] = { | ||
| 111 | [SIGSEGV] = "SIGSEGV", | ||
| 112 | [SIGBUS] = "SIBGUS", | ||
| 113 | [SIGTRAP] = "SIGTRAP", | ||
| 114 | [SIGILL] = "SIGILL", | ||
| 115 | }; | ||
| 116 | |||
| 117 | static void sigtrap(int sig, siginfo_t *si, void *ctx_void) | ||
| 118 | { | ||
| 119 | ucontext_t *ctx = ctx_void; | ||
| 120 | |||
| 121 | printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n", | ||
| 122 | (unsigned long)ctx->uc_mcontext.gregs[REG_IP], | ||
| 123 | !!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF)); | ||
| 124 | } | ||
| 125 | |||
| 126 | static void handle_and_return(int sig, siginfo_t *si, void *ctx_void) | ||
| 127 | { | ||
| 128 | ucontext_t *ctx = ctx_void; | ||
| 129 | |||
| 130 | printf("\tGot %s with RIP=%lx\n", signames[sig], | ||
| 131 | (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); | ||
| 132 | } | ||
| 133 | |||
| 134 | static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void) | ||
| 135 | { | ||
| 136 | ucontext_t *ctx = ctx_void; | ||
| 137 | |||
| 138 | printf("\tGot %s with RIP=%lx\n", signames[sig], | ||
| 139 | (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); | ||
| 140 | |||
| 141 | siglongjmp(jmpbuf, 1); | ||
| 142 | } | ||
| 143 | |||
| 144 | int main() | ||
| 145 | { | ||
| 146 | unsigned long nr; | ||
| 147 | |||
| 148 | asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss)); | ||
| 149 | printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss); | ||
| 150 | |||
| 151 | if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0) | ||
| 152 | printf("\tPR_SET_PTRACER_ANY succeeded\n"); | ||
| 153 | |||
| 154 | printf("\tSet up a watchpoint\n"); | ||
| 155 | sethandler(SIGTRAP, sigtrap, 0); | ||
| 156 | enable_watchpoint(); | ||
| 157 | |||
| 158 | printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n"); | ||
| 159 | asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss)); | ||
| 160 | |||
| 161 | printf("[RUN]\tMOV SS; INT3\n"); | ||
| 162 | asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss)); | ||
| 163 | |||
| 164 | printf("[RUN]\tMOV SS; INT 3\n"); | ||
| 165 | asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss)); | ||
| 166 | |||
| 167 | printf("[RUN]\tMOV SS; CS CS INT3\n"); | ||
| 168 | asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss)); | ||
| 169 | |||
| 170 | printf("[RUN]\tMOV SS; CSx14 INT3\n"); | ||
| 171 | asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss)); | ||
| 172 | |||
| 173 | printf("[RUN]\tMOV SS; INT 4\n"); | ||
| 174 | sethandler(SIGSEGV, handle_and_return, SA_RESETHAND); | ||
| 175 | asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss)); | ||
| 176 | |||
| 177 | #ifdef __i386__ | ||
| 178 | printf("[RUN]\tMOV SS; INTO\n"); | ||
| 179 | sethandler(SIGSEGV, handle_and_return, SA_RESETHAND); | ||
| 180 | nr = -1; | ||
| 181 | asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into" | ||
| 182 | : [tmp] "+r" (nr) : [ss] "m" (ss)); | ||
| 183 | #endif | ||
| 184 | |||
| 185 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
| 186 | printf("[RUN]\tMOV SS; ICEBP\n"); | ||
| 187 | |||
| 188 | /* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */ | ||
| 189 | sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND); | ||
| 190 | |||
| 191 | asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss)); | ||
| 192 | } | ||
| 193 | |||
| 194 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
| 195 | printf("[RUN]\tMOV SS; CLI\n"); | ||
| 196 | sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); | ||
| 197 | asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss)); | ||
| 198 | } | ||
| 199 | |||
| 200 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
| 201 | printf("[RUN]\tMOV SS; #PF\n"); | ||
| 202 | sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); | ||
| 203 | asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]" | ||
| 204 | : [tmp] "=r" (nr) : [ss] "m" (ss)); | ||
| 205 | } | ||
| 206 | |||
| 207 | /* | ||
| 208 | * INT $1: if #DB has DPL=3 and there isn't special handling, | ||
| 209 | * then the kernel will die. | ||
| 210 | */ | ||
| 211 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
| 212 | printf("[RUN]\tMOV SS; INT 1\n"); | ||
| 213 | sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); | ||
| 214 | asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss)); | ||
| 215 | } | ||
| 216 | |||
| 217 | #ifdef __x86_64__ | ||
| 218 | /* | ||
| 219 | * In principle, we should test 32-bit SYSCALL as well, but | ||
| 220 | * the calling convention is so unpredictable that it's | ||
| 221 | * not obviously worth the effort. | ||
| 222 | */ | ||
| 223 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
| 224 | printf("[RUN]\tMOV SS; SYSCALL\n"); | ||
| 225 | sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND); | ||
| 226 | nr = SYS_getpid; | ||
| 227 | /* | ||
| 228 | * Toggle the high bit of RSP to make it noncanonical to | ||
| 229 | * strengthen this test on non-SMAP systems. | ||
| 230 | */ | ||
| 231 | asm volatile ("btc $63, %%rsp\n\t" | ||
| 232 | "mov %[ss], %%ss; syscall\n\t" | ||
| 233 | "btc $63, %%rsp" | ||
| 234 | : "+a" (nr) : [ss] "m" (ss) | ||
| 235 | : "rcx" | ||
| 236 | #ifdef __x86_64__ | ||
| 237 | , "r11" | ||
| 238 | #endif | ||
| 239 | ); | ||
| 240 | } | ||
| 241 | #endif | ||
| 242 | |||
| 243 | printf("[RUN]\tMOV SS; breakpointed NOP\n"); | ||
| 244 | asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss)); | ||
| 245 | |||
| 246 | /* | ||
| 247 | * Invoking SYSENTER directly breaks all the rules. Just handle | ||
| 248 | * the SIGSEGV. | ||
| 249 | */ | ||
| 250 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
| 251 | printf("[RUN]\tMOV SS; SYSENTER\n"); | ||
| 252 | stack_t stack = { | ||
| 253 | .ss_sp = altstack_data, | ||
| 254 | .ss_size = SIGSTKSZ, | ||
| 255 | }; | ||
| 256 | if (sigaltstack(&stack, NULL) != 0) | ||
| 257 | err(1, "sigaltstack"); | ||
| 258 | sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK); | ||
| 259 | nr = SYS_getpid; | ||
| 260 | asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr) | ||
| 261 | : [ss] "m" (ss) : "flags", "rcx" | ||
| 262 | #ifdef __x86_64__ | ||
| 263 | , "r11" | ||
| 264 | #endif | ||
| 265 | ); | ||
| 266 | |||
| 267 | /* We're unreachable here. SYSENTER forgets RIP. */ | ||
| 268 | } | ||
| 269 | |||
| 270 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
| 271 | printf("[RUN]\tMOV SS; INT $0x80\n"); | ||
| 272 | sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); | ||
| 273 | nr = 20; /* compat getpid */ | ||
| 274 | asm volatile ("mov %[ss], %%ss; int $0x80" | ||
| 275 | : "+a" (nr) : [ss] "m" (ss) | ||
| 276 | : "flags" | ||
| 277 | #ifdef __x86_64__ | ||
| 278 | , "r8", "r9", "r10", "r11" | ||
| 279 | #endif | ||
| 280 | ); | ||
| 281 | } | ||
| 282 | |||
| 283 | printf("[OK]\tI aten't dead\n"); | ||
| 284 | return 0; | ||
| 285 | } | ||
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index 9c0325e1ea68..50f7e9272481 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c | |||
| @@ -368,6 +368,11 @@ static int expected_bnd_index = -1; | |||
| 368 | uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */ | 368 | uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */ |
| 369 | unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS]; | 369 | unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS]; |
| 370 | 370 | ||
| 371 | /* Failed address bound checks: */ | ||
| 372 | #ifndef SEGV_BNDERR | ||
| 373 | # define SEGV_BNDERR 3 | ||
| 374 | #endif | ||
| 375 | |||
| 371 | /* | 376 | /* |
| 372 | * The kernel is supposed to provide some information about the bounds | 377 | * The kernel is supposed to provide some information about the bounds |
| 373 | * exception in the siginfo. It should match what we have in the bounds | 378 | * exception in the siginfo. It should match what we have in the bounds |
| @@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext) | |||
| 419 | br_count++; | 424 | br_count++; |
| 420 | dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); | 425 | dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); |
| 421 | 426 | ||
| 422 | #define SEGV_BNDERR 3 /* failed address bound checks */ | ||
| 423 | |||
| 424 | dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", | 427 | dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", |
| 425 | status, ip, br_reason); | 428 | status, ip, br_reason); |
| 426 | dprintf2("si_signo: %d\n", si->si_signo); | 429 | dprintf2("si_signo: %d\n", si->si_signo); |
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h index b3cb7670e026..254e5436bdd9 100644 --- a/tools/testing/selftests/x86/pkey-helpers.h +++ b/tools/testing/selftests/x86/pkey-helpers.h | |||
| @@ -26,30 +26,26 @@ static inline void sigsafe_printf(const char *format, ...) | |||
| 26 | { | 26 | { |
| 27 | va_list ap; | 27 | va_list ap; |
| 28 | 28 | ||
| 29 | va_start(ap, format); | ||
| 30 | if (!dprint_in_signal) { | 29 | if (!dprint_in_signal) { |
| 30 | va_start(ap, format); | ||
| 31 | vprintf(format, ap); | 31 | vprintf(format, ap); |
| 32 | va_end(ap); | ||
| 32 | } else { | 33 | } else { |
| 33 | int ret; | 34 | int ret; |
| 34 | int len = vsnprintf(dprint_in_signal_buffer, | ||
| 35 | DPRINT_IN_SIGNAL_BUF_SIZE, | ||
| 36 | format, ap); | ||
| 37 | /* | 35 | /* |
| 38 | * len is amount that would have been printed, | 36 | * No printf() functions are signal-safe. |
| 39 | * but actual write is truncated at BUF_SIZE. | 37 | * They deadlock easily. Write the format |
| 38 | * string to get some output, even if | ||
| 39 | * incomplete. | ||
| 40 | */ | 40 | */ |
| 41 | if (len > DPRINT_IN_SIGNAL_BUF_SIZE) | 41 | ret = write(1, format, strlen(format)); |
| 42 | len = DPRINT_IN_SIGNAL_BUF_SIZE; | ||
| 43 | ret = write(1, dprint_in_signal_buffer, len); | ||
| 44 | if (ret < 0) | 42 | if (ret < 0) |
| 45 | abort(); | 43 | exit(1); |
| 46 | } | 44 | } |
| 47 | va_end(ap); | ||
| 48 | } | 45 | } |
| 49 | #define dprintf_level(level, args...) do { \ | 46 | #define dprintf_level(level, args...) do { \ |
| 50 | if (level <= DEBUG_LEVEL) \ | 47 | if (level <= DEBUG_LEVEL) \ |
| 51 | sigsafe_printf(args); \ | 48 | sigsafe_printf(args); \ |
| 52 | fflush(NULL); \ | ||
| 53 | } while (0) | 49 | } while (0) |
| 54 | #define dprintf0(args...) dprintf_level(0, args) | 50 | #define dprintf0(args...) dprintf_level(0, args) |
| 55 | #define dprintf1(args...) dprintf_level(1, args) | 51 | #define dprintf1(args...) dprintf_level(1, args) |
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index f15aa5a76fe3..460b4bdf4c1e 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c | |||
| @@ -72,10 +72,9 @@ extern void abort_hooks(void); | |||
| 72 | test_nr, iteration_nr); \ | 72 | test_nr, iteration_nr); \ |
| 73 | dprintf0("errno at assert: %d", errno); \ | 73 | dprintf0("errno at assert: %d", errno); \ |
| 74 | abort_hooks(); \ | 74 | abort_hooks(); \ |
| 75 | assert(condition); \ | 75 | exit(__LINE__); \ |
| 76 | } \ | 76 | } \ |
| 77 | } while (0) | 77 | } while (0) |
| 78 | #define raw_assert(cond) assert(cond) | ||
| 79 | 78 | ||
| 80 | void cat_into_file(char *str, char *file) | 79 | void cat_into_file(char *str, char *file) |
| 81 | { | 80 | { |
| @@ -87,12 +86,17 @@ void cat_into_file(char *str, char *file) | |||
| 87 | * these need to be raw because they are called under | 86 | * these need to be raw because they are called under |
| 88 | * pkey_assert() | 87 | * pkey_assert() |
| 89 | */ | 88 | */ |
| 90 | raw_assert(fd >= 0); | 89 | if (fd < 0) { |
| 90 | fprintf(stderr, "error opening '%s'\n", str); | ||
| 91 | perror("error: "); | ||
| 92 | exit(__LINE__); | ||
| 93 | } | ||
| 94 | |||
| 91 | ret = write(fd, str, strlen(str)); | 95 | ret = write(fd, str, strlen(str)); |
| 92 | if (ret != strlen(str)) { | 96 | if (ret != strlen(str)) { |
| 93 | perror("write to file failed"); | 97 | perror("write to file failed"); |
| 94 | fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); | 98 | fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); |
| 95 | raw_assert(0); | 99 | exit(__LINE__); |
| 96 | } | 100 | } |
| 97 | close(fd); | 101 | close(fd); |
| 98 | } | 102 | } |
| @@ -191,26 +195,30 @@ void lots_o_noops_around_write(int *write_to_me) | |||
| 191 | #ifdef __i386__ | 195 | #ifdef __i386__ |
| 192 | 196 | ||
| 193 | #ifndef SYS_mprotect_key | 197 | #ifndef SYS_mprotect_key |
| 194 | # define SYS_mprotect_key 380 | 198 | # define SYS_mprotect_key 380 |
| 195 | #endif | 199 | #endif |
| 200 | |||
| 196 | #ifndef SYS_pkey_alloc | 201 | #ifndef SYS_pkey_alloc |
| 197 | # define SYS_pkey_alloc 381 | 202 | # define SYS_pkey_alloc 381 |
| 198 | # define SYS_pkey_free 382 | 203 | # define SYS_pkey_free 382 |
| 199 | #endif | 204 | #endif |
| 200 | #define REG_IP_IDX REG_EIP | 205 | |
| 201 | #define si_pkey_offset 0x14 | 206 | #define REG_IP_IDX REG_EIP |
| 207 | #define si_pkey_offset 0x14 | ||
| 202 | 208 | ||
| 203 | #else | 209 | #else |
| 204 | 210 | ||
| 205 | #ifndef SYS_mprotect_key | 211 | #ifndef SYS_mprotect_key |
| 206 | # define SYS_mprotect_key 329 | 212 | # define SYS_mprotect_key 329 |
| 207 | #endif | 213 | #endif |
| 214 | |||
| 208 | #ifndef SYS_pkey_alloc | 215 | #ifndef SYS_pkey_alloc |
| 209 | # define SYS_pkey_alloc 330 | 216 | # define SYS_pkey_alloc 330 |
| 210 | # define SYS_pkey_free 331 | 217 | # define SYS_pkey_free 331 |
| 211 | #endif | 218 | #endif |
| 212 | #define REG_IP_IDX REG_RIP | 219 | |
| 213 | #define si_pkey_offset 0x20 | 220 | #define REG_IP_IDX REG_RIP |
| 221 | #define si_pkey_offset 0x20 | ||
| 214 | 222 | ||
| 215 | #endif | 223 | #endif |
| 216 | 224 | ||
| @@ -225,8 +233,14 @@ void dump_mem(void *dumpme, int len_bytes) | |||
| 225 | } | 233 | } |
| 226 | } | 234 | } |
| 227 | 235 | ||
| 228 | #define SEGV_BNDERR 3 /* failed address bound checks */ | 236 | /* Failed address bound checks: */ |
| 229 | #define SEGV_PKUERR 4 | 237 | #ifndef SEGV_BNDERR |
| 238 | # define SEGV_BNDERR 3 | ||
| 239 | #endif | ||
| 240 | |||
| 241 | #ifndef SEGV_PKUERR | ||
| 242 | # define SEGV_PKUERR 4 | ||
| 243 | #endif | ||
| 230 | 244 | ||
| 231 | static char *si_code_str(int si_code) | 245 | static char *si_code_str(int si_code) |
| 232 | { | 246 | { |
| @@ -289,13 +303,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) | |||
| 289 | dump_mem(pkru_ptr - 128, 256); | 303 | dump_mem(pkru_ptr - 128, 256); |
| 290 | pkey_assert(*pkru_ptr); | 304 | pkey_assert(*pkru_ptr); |
| 291 | 305 | ||
| 292 | si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); | ||
| 293 | dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); | ||
| 294 | dump_mem(si_pkey_ptr - 8, 24); | ||
| 295 | siginfo_pkey = *si_pkey_ptr; | ||
| 296 | pkey_assert(siginfo_pkey < NR_PKEYS); | ||
| 297 | last_si_pkey = siginfo_pkey; | ||
| 298 | |||
| 299 | if ((si->si_code == SEGV_MAPERR) || | 306 | if ((si->si_code == SEGV_MAPERR) || |
| 300 | (si->si_code == SEGV_ACCERR) || | 307 | (si->si_code == SEGV_ACCERR) || |
| 301 | (si->si_code == SEGV_BNDERR)) { | 308 | (si->si_code == SEGV_BNDERR)) { |
| @@ -303,6 +310,13 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) | |||
| 303 | exit(4); | 310 | exit(4); |
| 304 | } | 311 | } |
| 305 | 312 | ||
| 313 | si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); | ||
| 314 | dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); | ||
| 315 | dump_mem((u8 *)si_pkey_ptr - 8, 24); | ||
| 316 | siginfo_pkey = *si_pkey_ptr; | ||
| 317 | pkey_assert(siginfo_pkey < NR_PKEYS); | ||
| 318 | last_si_pkey = siginfo_pkey; | ||
| 319 | |||
| 306 | dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); | 320 | dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); |
| 307 | /* need __rdpkru() version so we do not do shadow_pkru checking */ | 321 | /* need __rdpkru() version so we do not do shadow_pkru checking */ |
| 308 | dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); | 322 | dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); |
| @@ -311,22 +325,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) | |||
| 311 | dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); | 325 | dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); |
| 312 | pkru_faults++; | 326 | pkru_faults++; |
| 313 | dprintf1("<<<<==================================================\n"); | 327 | dprintf1("<<<<==================================================\n"); |
| 314 | return; | ||
| 315 | if (trapno == 14) { | ||
| 316 | fprintf(stderr, | ||
| 317 | "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", | ||
| 318 | trapno, ip); | ||
| 319 | fprintf(stderr, "si_addr %p\n", si->si_addr); | ||
| 320 | fprintf(stderr, "REG_ERR: %lx\n", | ||
| 321 | (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); | ||
| 322 | exit(1); | ||
| 323 | } else { | ||
| 324 | fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip); | ||
| 325 | fprintf(stderr, "si_addr %p\n", si->si_addr); | ||
| 326 | fprintf(stderr, "REG_ERR: %lx\n", | ||
| 327 | (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); | ||
| 328 | exit(2); | ||
| 329 | } | ||
| 330 | dprint_in_signal = 0; | 328 | dprint_in_signal = 0; |
| 331 | } | 329 | } |
| 332 | 330 | ||
| @@ -393,10 +391,15 @@ pid_t fork_lazy_child(void) | |||
| 393 | return forkret; | 391 | return forkret; |
| 394 | } | 392 | } |
| 395 | 393 | ||
| 396 | #define PKEY_DISABLE_ACCESS 0x1 | 394 | #ifndef PKEY_DISABLE_ACCESS |
| 397 | #define PKEY_DISABLE_WRITE 0x2 | 395 | # define PKEY_DISABLE_ACCESS 0x1 |
| 396 | #endif | ||
| 397 | |||
| 398 | #ifndef PKEY_DISABLE_WRITE | ||
| 399 | # define PKEY_DISABLE_WRITE 0x2 | ||
| 400 | #endif | ||
| 398 | 401 | ||
| 399 | u32 pkey_get(int pkey, unsigned long flags) | 402 | static u32 hw_pkey_get(int pkey, unsigned long flags) |
| 400 | { | 403 | { |
| 401 | u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); | 404 | u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); |
| 402 | u32 pkru = __rdpkru(); | 405 | u32 pkru = __rdpkru(); |
| @@ -418,7 +421,7 @@ u32 pkey_get(int pkey, unsigned long flags) | |||
| 418 | return masked_pkru; | 421 | return masked_pkru; |
| 419 | } | 422 | } |
| 420 | 423 | ||
| 421 | int pkey_set(int pkey, unsigned long rights, unsigned long flags) | 424 | static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) |
| 422 | { | 425 | { |
| 423 | u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); | 426 | u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); |
| 424 | u32 old_pkru = __rdpkru(); | 427 | u32 old_pkru = __rdpkru(); |
| @@ -452,15 +455,15 @@ void pkey_disable_set(int pkey, int flags) | |||
| 452 | pkey, flags); | 455 | pkey, flags); |
| 453 | pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); | 456 | pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); |
| 454 | 457 | ||
| 455 | pkey_rights = pkey_get(pkey, syscall_flags); | 458 | pkey_rights = hw_pkey_get(pkey, syscall_flags); |
| 456 | 459 | ||
| 457 | dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, | 460 | dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, |
| 458 | pkey, pkey, pkey_rights); | 461 | pkey, pkey, pkey_rights); |
| 459 | pkey_assert(pkey_rights >= 0); | 462 | pkey_assert(pkey_rights >= 0); |
| 460 | 463 | ||
| 461 | pkey_rights |= flags; | 464 | pkey_rights |= flags; |
| 462 | 465 | ||
| 463 | ret = pkey_set(pkey, pkey_rights, syscall_flags); | 466 | ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); |
| 464 | assert(!ret); | 467 | assert(!ret); |
| 465 | /*pkru and flags have the same format */ | 468 | /*pkru and flags have the same format */ |
| 466 | shadow_pkru |= flags << (pkey * 2); | 469 | shadow_pkru |= flags << (pkey * 2); |
| @@ -468,8 +471,8 @@ void pkey_disable_set(int pkey, int flags) | |||
| 468 | 471 | ||
| 469 | pkey_assert(ret >= 0); | 472 | pkey_assert(ret >= 0); |
| 470 | 473 | ||
| 471 | pkey_rights = pkey_get(pkey, syscall_flags); | 474 | pkey_rights = hw_pkey_get(pkey, syscall_flags); |
| 472 | dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, | 475 | dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, |
| 473 | pkey, pkey, pkey_rights); | 476 | pkey, pkey, pkey_rights); |
| 474 | 477 | ||
| 475 | dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); | 478 | dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); |
| @@ -483,24 +486,24 @@ void pkey_disable_clear(int pkey, int flags) | |||
| 483 | { | 486 | { |
| 484 | unsigned long syscall_flags = 0; | 487 | unsigned long syscall_flags = 0; |
| 485 | int ret; | 488 | int ret; |
| 486 | int pkey_rights = pkey_get(pkey, syscall_flags); | 489 | int pkey_rights = hw_pkey_get(pkey, syscall_flags); |
| 487 | u32 orig_pkru = rdpkru(); | 490 | u32 orig_pkru = rdpkru(); |
| 488 | 491 | ||
| 489 | pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); | 492 | pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); |
| 490 | 493 | ||
| 491 | dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, | 494 | dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, |
| 492 | pkey, pkey, pkey_rights); | 495 | pkey, pkey, pkey_rights); |
| 493 | pkey_assert(pkey_rights >= 0); | 496 | pkey_assert(pkey_rights >= 0); |
| 494 | 497 | ||
| 495 | pkey_rights |= flags; | 498 | pkey_rights |= flags; |
| 496 | 499 | ||
| 497 | ret = pkey_set(pkey, pkey_rights, 0); | 500 | ret = hw_pkey_set(pkey, pkey_rights, 0); |
| 498 | /* pkru and flags have the same format */ | 501 | /* pkru and flags have the same format */ |
| 499 | shadow_pkru &= ~(flags << (pkey * 2)); | 502 | shadow_pkru &= ~(flags << (pkey * 2)); |
| 500 | pkey_assert(ret >= 0); | 503 | pkey_assert(ret >= 0); |
| 501 | 504 | ||
| 502 | pkey_rights = pkey_get(pkey, syscall_flags); | 505 | pkey_rights = hw_pkey_get(pkey, syscall_flags); |
| 503 | dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, | 506 | dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, |
| 504 | pkey, pkey, pkey_rights); | 507 | pkey, pkey, pkey_rights); |
| 505 | 508 | ||
| 506 | dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); | 509 | dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); |
| @@ -674,10 +677,12 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, | |||
| 674 | struct pkey_malloc_record { | 677 | struct pkey_malloc_record { |
| 675 | void *ptr; | 678 | void *ptr; |
| 676 | long size; | 679 | long size; |
| 680 | int prot; | ||
| 677 | }; | 681 | }; |
| 678 | struct pkey_malloc_record *pkey_malloc_records; | 682 | struct pkey_malloc_record *pkey_malloc_records; |
| 683 | struct pkey_malloc_record *pkey_last_malloc_record; | ||
| 679 | long nr_pkey_malloc_records; | 684 | long nr_pkey_malloc_records; |
| 680 | void record_pkey_malloc(void *ptr, long size) | 685 | void record_pkey_malloc(void *ptr, long size, int prot) |
| 681 | { | 686 | { |
| 682 | long i; | 687 | long i; |
| 683 | struct pkey_malloc_record *rec = NULL; | 688 | struct pkey_malloc_record *rec = NULL; |
| @@ -709,6 +714,8 @@ void record_pkey_malloc(void *ptr, long size) | |||
| 709 | (int)(rec - pkey_malloc_records), rec, ptr, size); | 714 | (int)(rec - pkey_malloc_records), rec, ptr, size); |
| 710 | rec->ptr = ptr; | 715 | rec->ptr = ptr; |
| 711 | rec->size = size; | 716 | rec->size = size; |
| 717 | rec->prot = prot; | ||
| 718 | pkey_last_malloc_record = rec; | ||
| 712 | nr_pkey_malloc_records++; | 719 | nr_pkey_malloc_records++; |
| 713 | } | 720 | } |
| 714 | 721 | ||
| @@ -753,7 +760,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) | |||
| 753 | pkey_assert(ptr != (void *)-1); | 760 | pkey_assert(ptr != (void *)-1); |
| 754 | ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); | 761 | ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); |
| 755 | pkey_assert(!ret); | 762 | pkey_assert(!ret); |
| 756 | record_pkey_malloc(ptr, size); | 763 | record_pkey_malloc(ptr, size, prot); |
| 757 | rdpkru(); | 764 | rdpkru(); |
| 758 | 765 | ||
| 759 | dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); | 766 | dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); |
| @@ -774,7 +781,7 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) | |||
| 774 | size = ALIGN_UP(size, HPAGE_SIZE * 2); | 781 | size = ALIGN_UP(size, HPAGE_SIZE * 2); |
| 775 | ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); | 782 | ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); |
| 776 | pkey_assert(ptr != (void *)-1); | 783 | pkey_assert(ptr != (void *)-1); |
| 777 | record_pkey_malloc(ptr, size); | 784 | record_pkey_malloc(ptr, size, prot); |
| 778 | mprotect_pkey(ptr, size, prot, pkey); | 785 | mprotect_pkey(ptr, size, prot, pkey); |
| 779 | 786 | ||
| 780 | dprintf1("unaligned ptr: %p\n", ptr); | 787 | dprintf1("unaligned ptr: %p\n", ptr); |
| @@ -847,7 +854,7 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) | |||
| 847 | pkey_assert(ptr != (void *)-1); | 854 | pkey_assert(ptr != (void *)-1); |
| 848 | mprotect_pkey(ptr, size, prot, pkey); | 855 | mprotect_pkey(ptr, size, prot, pkey); |
| 849 | 856 | ||
| 850 | record_pkey_malloc(ptr, size); | 857 | record_pkey_malloc(ptr, size, prot); |
| 851 | 858 | ||
| 852 | dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); | 859 | dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); |
| 853 | return ptr; | 860 | return ptr; |
| @@ -869,7 +876,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) | |||
| 869 | 876 | ||
| 870 | mprotect_pkey(ptr, size, prot, pkey); | 877 | mprotect_pkey(ptr, size, prot, pkey); |
| 871 | 878 | ||
| 872 | record_pkey_malloc(ptr, size); | 879 | record_pkey_malloc(ptr, size, prot); |
| 873 | 880 | ||
| 874 | dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); | 881 | dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); |
| 875 | close(fd); | 882 | close(fd); |
| @@ -918,13 +925,21 @@ void *malloc_pkey(long size, int prot, u16 pkey) | |||
| 918 | } | 925 | } |
| 919 | 926 | ||
| 920 | int last_pkru_faults; | 927 | int last_pkru_faults; |
| 928 | #define UNKNOWN_PKEY -2 | ||
| 921 | void expected_pk_fault(int pkey) | 929 | void expected_pk_fault(int pkey) |
| 922 | { | 930 | { |
| 923 | dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", | 931 | dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", |
| 924 | __func__, last_pkru_faults, pkru_faults); | 932 | __func__, last_pkru_faults, pkru_faults); |
| 925 | dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); | 933 | dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); |
| 926 | pkey_assert(last_pkru_faults + 1 == pkru_faults); | 934 | pkey_assert(last_pkru_faults + 1 == pkru_faults); |
| 927 | pkey_assert(last_si_pkey == pkey); | 935 | |
| 936 | /* | ||
| 937 | * For exec-only memory, we do not know the pkey in | ||
| 938 | * advance, so skip this check. | ||
| 939 | */ | ||
| 940 | if (pkey != UNKNOWN_PKEY) | ||
| 941 | pkey_assert(last_si_pkey == pkey); | ||
| 942 | |||
| 928 | /* | 943 | /* |
| 929 | * The signal handler shold have cleared out PKRU to let the | 944 | * The signal handler shold have cleared out PKRU to let the |
| 930 | * test program continue. We now have to restore it. | 945 | * test program continue. We now have to restore it. |
| @@ -939,10 +954,11 @@ void expected_pk_fault(int pkey) | |||
| 939 | last_si_pkey = -1; | 954 | last_si_pkey = -1; |
| 940 | } | 955 | } |
| 941 | 956 | ||
| 942 | void do_not_expect_pk_fault(void) | 957 | #define do_not_expect_pk_fault(msg) do { \ |
| 943 | { | 958 | if (last_pkru_faults != pkru_faults) \ |
| 944 | pkey_assert(last_pkru_faults == pkru_faults); | 959 | dprintf0("unexpected PK fault: %s\n", msg); \ |
| 945 | } | 960 | pkey_assert(last_pkru_faults == pkru_faults); \ |
| 961 | } while (0) | ||
| 946 | 962 | ||
| 947 | int test_fds[10] = { -1 }; | 963 | int test_fds[10] = { -1 }; |
| 948 | int nr_test_fds; | 964 | int nr_test_fds; |
| @@ -1151,12 +1167,15 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) | |||
| 1151 | pkey_assert(i < NR_PKEYS*2); | 1167 | pkey_assert(i < NR_PKEYS*2); |
| 1152 | 1168 | ||
| 1153 | /* | 1169 | /* |
| 1154 | * There are 16 pkeys supported in hardware. One is taken | 1170 | * There are 16 pkeys supported in hardware. Three are |
| 1155 | * up for the default (0) and another can be taken up by | 1171 | * allocated by the time we get here: |
| 1156 | * an execute-only mapping. Ensure that we can allocate | 1172 | * 1. The default key (0) |
| 1157 | * at least 14 (16-2). | 1173 | * 2. One possibly consumed by an execute-only mapping. |
| 1174 | * 3. One allocated by the test code and passed in via | ||
| 1175 | * 'pkey' to this function. | ||
| 1176 | * Ensure that we can allocate at least another 13 (16-3). | ||
| 1158 | */ | 1177 | */ |
| 1159 | pkey_assert(i >= NR_PKEYS-2); | 1178 | pkey_assert(i >= NR_PKEYS-3); |
| 1160 | 1179 | ||
| 1161 | for (i = 0; i < nr_allocated_pkeys; i++) { | 1180 | for (i = 0; i < nr_allocated_pkeys; i++) { |
| 1162 | err = sys_pkey_free(allocated_pkeys[i]); | 1181 | err = sys_pkey_free(allocated_pkeys[i]); |
| @@ -1165,6 +1184,35 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) | |||
| 1165 | } | 1184 | } |
| 1166 | } | 1185 | } |
| 1167 | 1186 | ||
| 1187 | /* | ||
| 1188 | * pkey 0 is special. It is allocated by default, so you do not | ||
| 1189 | * have to call pkey_alloc() to use it first. Make sure that it | ||
| 1190 | * is usable. | ||
| 1191 | */ | ||
| 1192 | void test_mprotect_with_pkey_0(int *ptr, u16 pkey) | ||
| 1193 | { | ||
| 1194 | long size; | ||
| 1195 | int prot; | ||
| 1196 | |||
| 1197 | assert(pkey_last_malloc_record); | ||
| 1198 | size = pkey_last_malloc_record->size; | ||
| 1199 | /* | ||
| 1200 | * This is a bit of a hack. But mprotect() requires | ||
| 1201 | * huge-page-aligned sizes when operating on hugetlbfs. | ||
| 1202 | * So, make sure that we use something that's a multiple | ||
| 1203 | * of a huge page when we can. | ||
| 1204 | */ | ||
| 1205 | if (size >= HPAGE_SIZE) | ||
| 1206 | size = HPAGE_SIZE; | ||
| 1207 | prot = pkey_last_malloc_record->prot; | ||
| 1208 | |||
| 1209 | /* Use pkey 0 */ | ||
| 1210 | mprotect_pkey(ptr, size, prot, 0); | ||
| 1211 | |||
| 1212 | /* Make sure that we can set it back to the original pkey. */ | ||
| 1213 | mprotect_pkey(ptr, size, prot, pkey); | ||
| 1214 | } | ||
| 1215 | |||
| 1168 | void test_ptrace_of_child(int *ptr, u16 pkey) | 1216 | void test_ptrace_of_child(int *ptr, u16 pkey) |
| 1169 | { | 1217 | { |
| 1170 | __attribute__((__unused__)) int peek_result; | 1218 | __attribute__((__unused__)) int peek_result; |
| @@ -1228,7 +1276,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) | |||
| 1228 | pkey_assert(ret != -1); | 1276 | pkey_assert(ret != -1); |
| 1229 | /* Now access from the current task, and expect NO exception: */ | 1277 | /* Now access from the current task, and expect NO exception: */ |
| 1230 | peek_result = read_ptr(plain_ptr); | 1278 | peek_result = read_ptr(plain_ptr); |
| 1231 | do_not_expect_pk_fault(); | 1279 | do_not_expect_pk_fault("read plain pointer after ptrace"); |
| 1232 | 1280 | ||
| 1233 | ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); | 1281 | ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); |
| 1234 | pkey_assert(ret != -1); | 1282 | pkey_assert(ret != -1); |
| @@ -1241,12 +1289,9 @@ void test_ptrace_of_child(int *ptr, u16 pkey) | |||
| 1241 | free(plain_ptr_unaligned); | 1289 | free(plain_ptr_unaligned); |
| 1242 | } | 1290 | } |
| 1243 | 1291 | ||
| 1244 | void test_executing_on_unreadable_memory(int *ptr, u16 pkey) | 1292 | void *get_pointer_to_instructions(void) |
| 1245 | { | 1293 | { |
| 1246 | void *p1; | 1294 | void *p1; |
| 1247 | int scratch; | ||
| 1248 | int ptr_contents; | ||
| 1249 | int ret; | ||
| 1250 | 1295 | ||
| 1251 | p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); | 1296 | p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); |
| 1252 | dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); | 1297 | dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); |
| @@ -1256,7 +1301,23 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) | |||
| 1256 | /* Point 'p1' at the *second* page of the function: */ | 1301 | /* Point 'p1' at the *second* page of the function: */ |
| 1257 | p1 += PAGE_SIZE; | 1302 | p1 += PAGE_SIZE; |
| 1258 | 1303 | ||
| 1304 | /* | ||
| 1305 | * Try to ensure we fault this in on next touch to ensure | ||
| 1306 | * we get an instruction fault as opposed to a data one | ||
| 1307 | */ | ||
| 1259 | madvise(p1, PAGE_SIZE, MADV_DONTNEED); | 1308 | madvise(p1, PAGE_SIZE, MADV_DONTNEED); |
| 1309 | |||
| 1310 | return p1; | ||
| 1311 | } | ||
| 1312 | |||
| 1313 | void test_executing_on_unreadable_memory(int *ptr, u16 pkey) | ||
| 1314 | { | ||
| 1315 | void *p1; | ||
| 1316 | int scratch; | ||
| 1317 | int ptr_contents; | ||
| 1318 | int ret; | ||
| 1319 | |||
| 1320 | p1 = get_pointer_to_instructions(); | ||
| 1260 | lots_o_noops_around_write(&scratch); | 1321 | lots_o_noops_around_write(&scratch); |
| 1261 | ptr_contents = read_ptr(p1); | 1322 | ptr_contents = read_ptr(p1); |
| 1262 | dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); | 1323 | dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); |
| @@ -1272,12 +1333,55 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) | |||
| 1272 | */ | 1333 | */ |
| 1273 | madvise(p1, PAGE_SIZE, MADV_DONTNEED); | 1334 | madvise(p1, PAGE_SIZE, MADV_DONTNEED); |
| 1274 | lots_o_noops_around_write(&scratch); | 1335 | lots_o_noops_around_write(&scratch); |
| 1275 | do_not_expect_pk_fault(); | 1336 | do_not_expect_pk_fault("executing on PROT_EXEC memory"); |
| 1276 | ptr_contents = read_ptr(p1); | 1337 | ptr_contents = read_ptr(p1); |
| 1277 | dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); | 1338 | dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); |
| 1278 | expected_pk_fault(pkey); | 1339 | expected_pk_fault(pkey); |
| 1279 | } | 1340 | } |
| 1280 | 1341 | ||
| 1342 | void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) | ||
| 1343 | { | ||
| 1344 | void *p1; | ||
| 1345 | int scratch; | ||
| 1346 | int ptr_contents; | ||
| 1347 | int ret; | ||
| 1348 | |||
| 1349 | dprintf1("%s() start\n", __func__); | ||
| 1350 | |||
| 1351 | p1 = get_pointer_to_instructions(); | ||
| 1352 | lots_o_noops_around_write(&scratch); | ||
| 1353 | ptr_contents = read_ptr(p1); | ||
| 1354 | dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); | ||
| 1355 | |||
| 1356 | /* Use a *normal* mprotect(), not mprotect_pkey(): */ | ||
| 1357 | ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); | ||
| 1358 | pkey_assert(!ret); | ||
| 1359 | |||
| 1360 | dprintf2("pkru: %x\n", rdpkru()); | ||
| 1361 | |||
| 1362 | /* Make sure this is an *instruction* fault */ | ||
| 1363 | madvise(p1, PAGE_SIZE, MADV_DONTNEED); | ||
| 1364 | lots_o_noops_around_write(&scratch); | ||
| 1365 | do_not_expect_pk_fault("executing on PROT_EXEC memory"); | ||
| 1366 | ptr_contents = read_ptr(p1); | ||
| 1367 | dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); | ||
| 1368 | expected_pk_fault(UNKNOWN_PKEY); | ||
| 1369 | |||
| 1370 | /* | ||
| 1371 | * Put the memory back to non-PROT_EXEC. Should clear the | ||
| 1372 | * exec-only pkey off the VMA and allow it to be readable | ||
| 1373 | * again. Go to PROT_NONE first to check for a kernel bug | ||
| 1374 | * that did not clear the pkey when doing PROT_NONE. | ||
| 1375 | */ | ||
| 1376 | ret = mprotect(p1, PAGE_SIZE, PROT_NONE); | ||
| 1377 | pkey_assert(!ret); | ||
| 1378 | |||
| 1379 | ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); | ||
| 1380 | pkey_assert(!ret); | ||
| 1381 | ptr_contents = read_ptr(p1); | ||
| 1382 | do_not_expect_pk_fault("plain read on recently PROT_EXEC area"); | ||
| 1383 | } | ||
| 1384 | |||
| 1281 | void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) | 1385 | void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) |
| 1282 | { | 1386 | { |
| 1283 | int size = PAGE_SIZE; | 1387 | int size = PAGE_SIZE; |
| @@ -1302,6 +1406,8 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { | |||
| 1302 | test_kernel_gup_of_access_disabled_region, | 1406 | test_kernel_gup_of_access_disabled_region, |
| 1303 | test_kernel_gup_write_to_write_disabled_region, | 1407 | test_kernel_gup_write_to_write_disabled_region, |
| 1304 | test_executing_on_unreadable_memory, | 1408 | test_executing_on_unreadable_memory, |
| 1409 | test_implicit_mprotect_exec_only_memory, | ||
| 1410 | test_mprotect_with_pkey_0, | ||
| 1305 | test_ptrace_of_child, | 1411 | test_ptrace_of_child, |
| 1306 | test_pkey_syscalls_on_non_allocated_pkey, | 1412 | test_pkey_syscalls_on_non_allocated_pkey, |
| 1307 | test_pkey_syscalls_bad_args, | 1413 | test_pkey_syscalls_bad_args, |
