diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-05-20 14:28:32 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-05-20 14:28:32 -0400 |
commit | 8a6bd2f40e96fb4d96749ab029c61f0df218b003 (patch) | |
tree | 33bbe31fad29d88c066f91fb577c92496f659122 | |
parent | b9aad92236391f681083fa4045083d5b846b59e0 (diff) | |
parent | acf46020012ccbca1172e9c7aeab399c950d9212 (diff) |
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Thomas Gleixner:
"An unfortunately larger set of fixes, but a large portion is
selftests:
- Fix the missing clusterid initializaiton for x2apic cluster
management which caused boot failures due to IPIs being sent to the
wrong cluster
- Drop TX_COMPAT when a 64bit executable is exec()'ed from a compat
task
- Wrap access to __supported_pte_mask in __startup_64() where clang
compile fails due to a non PC relative access being generated.
- Two fixes for 5 level paging fallout in the decompressor:
- Handle GOT correctly for paging_prepare() and
cleanup_trampoline()
- Fix the page table handling in cleanup_trampoline() to avoid
page table corruption.
- Stop special casing protection key 0 as this is inconsistent with
the manpage and also inconsistent with the allocation map handling.
- Override the protection key wen moving away from PROT_EXEC to
prevent inaccessible memory.
- Fix and update the protection key selftests to address breakage and
to cover the above issue
- Add a MOV SS self test"
[ Part of the x86 fixes were in the earlier core pull due to dependencies ]
* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
x86/mm: Drop TS_COMPAT on 64-bit exec() syscall
x86/apic/x2apic: Initialize cluster ID properly
x86/boot/compressed/64: Fix moving page table out of trampoline memory
x86/boot/compressed/64: Set up GOT for paging_prepare() and cleanup_trampoline()
x86/pkeys: Do not special case protection key 0
x86/pkeys/selftests: Add a test for pkey 0
x86/pkeys/selftests: Save off 'prot' for allocations
x86/pkeys/selftests: Fix pointer math
x86/pkeys: Override pkey when moving away from PROT_EXEC
x86/pkeys/selftests: Fix pkey exhaustion test off-by-one
x86/pkeys/selftests: Add PROT_EXEC test
x86/pkeys/selftests: Factor out "instruction page"
x86/pkeys/selftests: Allow faults on unknown keys
x86/pkeys/selftests: Avoid printf-in-signal deadlocks
x86/pkeys/selftests: Remove dead debugging code, fix dprint_in_signal
x86/pkeys/selftests: Stop using assert()
x86/pkeys/selftests: Give better unexpected fault error messages
x86/selftests: Add mov_to_ss test
x86/mpx/selftests: Adjust the self-test to fresh distros that export the MPX ABI
x86/pkeys/selftests: Adjust the self-test to fresh distros that export the pkeys ABI
...
-rw-r--r-- | arch/x86/boot/compressed/head_64.S | 79 | ||||
-rw-r--r-- | arch/x86/boot/compressed/pgtable_64.c | 14 | ||||
-rw-r--r-- | arch/x86/include/asm/mmu_context.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/pkeys.h | 18 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_cluster.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/head64.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/pkeys.c | 21 | ||||
-rw-r--r-- | tools/testing/selftests/x86/Makefile | 2 | ||||
-rw-r--r-- | tools/testing/selftests/x86/mov_ss_trap.c | 285 | ||||
-rw-r--r-- | tools/testing/selftests/x86/mpx-mini-test.c | 7 | ||||
-rw-r--r-- | tools/testing/selftests/x86/pkey-helpers.h | 20 | ||||
-rw-r--r-- | tools/testing/selftests/x86/protection_keys.c | 254 |
13 files changed, 585 insertions, 129 deletions
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fca012baba19..8169e8b7a4dc 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -306,6 +306,25 @@ ENTRY(startup_64) | |||
306 | leaq boot_stack_end(%rbx), %rsp | 306 | leaq boot_stack_end(%rbx), %rsp |
307 | 307 | ||
308 | /* | 308 | /* |
309 | * paging_prepare() and cleanup_trampoline() below can have GOT | ||
310 | * references. Adjust the table with address we are running at. | ||
311 | * | ||
312 | * Zero RAX for adjust_got: the GOT was not adjusted before; | ||
313 | * there's no adjustment to undo. | ||
314 | */ | ||
315 | xorq %rax, %rax | ||
316 | |||
317 | /* | ||
318 | * Calculate the address the binary is loaded at and use it as | ||
319 | * a GOT adjustment. | ||
320 | */ | ||
321 | call 1f | ||
322 | 1: popq %rdi | ||
323 | subq $1b, %rdi | ||
324 | |||
325 | call adjust_got | ||
326 | |||
327 | /* | ||
309 | * At this point we are in long mode with 4-level paging enabled, | 328 | * At this point we are in long mode with 4-level paging enabled, |
310 | * but we might want to enable 5-level paging or vice versa. | 329 | * but we might want to enable 5-level paging or vice versa. |
311 | * | 330 | * |
@@ -370,10 +389,14 @@ trampoline_return: | |||
370 | /* | 389 | /* |
371 | * cleanup_trampoline() would restore trampoline memory. | 390 | * cleanup_trampoline() would restore trampoline memory. |
372 | * | 391 | * |
392 | * RDI is address of the page table to use instead of page table | ||
393 | * in trampoline memory (if required). | ||
394 | * | ||
373 | * RSI holds real mode data and needs to be preserved across | 395 | * RSI holds real mode data and needs to be preserved across |
374 | * this function call. | 396 | * this function call. |
375 | */ | 397 | */ |
376 | pushq %rsi | 398 | pushq %rsi |
399 | leaq top_pgtable(%rbx), %rdi | ||
377 | call cleanup_trampoline | 400 | call cleanup_trampoline |
378 | popq %rsi | 401 | popq %rsi |
379 | 402 | ||
@@ -381,6 +404,21 @@ trampoline_return: | |||
381 | pushq $0 | 404 | pushq $0 |
382 | popfq | 405 | popfq |
383 | 406 | ||
407 | /* | ||
408 | * Previously we've adjusted the GOT with address the binary was | ||
409 | * loaded at. Now we need to re-adjust for relocation address. | ||
410 | * | ||
411 | * Calculate the address the binary is loaded at, so that we can | ||
412 | * undo the previous GOT adjustment. | ||
413 | */ | ||
414 | call 1f | ||
415 | 1: popq %rax | ||
416 | subq $1b, %rax | ||
417 | |||
418 | /* The new adjustment is the relocation address */ | ||
419 | movq %rbx, %rdi | ||
420 | call adjust_got | ||
421 | |||
384 | /* | 422 | /* |
385 | * Copy the compressed kernel to the end of our buffer | 423 | * Copy the compressed kernel to the end of our buffer |
386 | * where decompression in place becomes safe. | 424 | * where decompression in place becomes safe. |
@@ -482,19 +520,6 @@ relocated: | |||
482 | rep stosq | 520 | rep stosq |
483 | 521 | ||
484 | /* | 522 | /* |
485 | * Adjust our own GOT | ||
486 | */ | ||
487 | leaq _got(%rip), %rdx | ||
488 | leaq _egot(%rip), %rcx | ||
489 | 1: | ||
490 | cmpq %rcx, %rdx | ||
491 | jae 2f | ||
492 | addq %rbx, (%rdx) | ||
493 | addq $8, %rdx | ||
494 | jmp 1b | ||
495 | 2: | ||
496 | |||
497 | /* | ||
498 | * Do the extraction, and jump to the new kernel.. | 523 | * Do the extraction, and jump to the new kernel.. |
499 | */ | 524 | */ |
500 | pushq %rsi /* Save the real mode argument */ | 525 | pushq %rsi /* Save the real mode argument */ |
@@ -512,6 +537,27 @@ relocated: | |||
512 | */ | 537 | */ |
513 | jmp *%rax | 538 | jmp *%rax |
514 | 539 | ||
540 | /* | ||
541 | * Adjust the global offset table | ||
542 | * | ||
543 | * RAX is the previous adjustment of the table to undo (use 0 if it's the | ||
544 | * first time we touch GOT). | ||
545 | * RDI is the new adjustment to apply. | ||
546 | */ | ||
547 | adjust_got: | ||
548 | /* Walk through the GOT adding the address to the entries */ | ||
549 | leaq _got(%rip), %rdx | ||
550 | leaq _egot(%rip), %rcx | ||
551 | 1: | ||
552 | cmpq %rcx, %rdx | ||
553 | jae 2f | ||
554 | subq %rax, (%rdx) /* Undo previous adjustment */ | ||
555 | addq %rdi, (%rdx) /* Apply the new adjustment */ | ||
556 | addq $8, %rdx | ||
557 | jmp 1b | ||
558 | 2: | ||
559 | ret | ||
560 | |||
515 | .code32 | 561 | .code32 |
516 | /* | 562 | /* |
517 | * This is the 32-bit trampoline that will be copied over to low memory. | 563 | * This is the 32-bit trampoline that will be copied over to low memory. |
@@ -649,3 +695,10 @@ boot_stack_end: | |||
649 | .balign 4096 | 695 | .balign 4096 |
650 | pgtable: | 696 | pgtable: |
651 | .fill BOOT_PGT_SIZE, 1, 0 | 697 | .fill BOOT_PGT_SIZE, 1, 0 |
698 | |||
699 | /* | ||
700 | * The page table is going to be used instead of page table in the trampoline | ||
701 | * memory. | ||
702 | */ | ||
703 | top_pgtable: | ||
704 | .fill PAGE_SIZE, 1, 0 | ||
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 32af1cbcd903..a362fa0b849c 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c | |||
@@ -23,14 +23,6 @@ struct paging_config { | |||
23 | static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; | 23 | static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; |
24 | 24 | ||
25 | /* | 25 | /* |
26 | * The page table is going to be used instead of page table in the trampoline | ||
27 | * memory. | ||
28 | * | ||
29 | * It must not be in BSS as BSS is cleared after cleanup_trampoline(). | ||
30 | */ | ||
31 | static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data); | ||
32 | |||
33 | /* | ||
34 | * Trampoline address will be printed by extract_kernel() for debugging | 26 | * Trampoline address will be printed by extract_kernel() for debugging |
35 | * purposes. | 27 | * purposes. |
36 | * | 28 | * |
@@ -134,7 +126,7 @@ out: | |||
134 | return paging_config; | 126 | return paging_config; |
135 | } | 127 | } |
136 | 128 | ||
137 | void cleanup_trampoline(void) | 129 | void cleanup_trampoline(void *pgtable) |
138 | { | 130 | { |
139 | void *trampoline_pgtable; | 131 | void *trampoline_pgtable; |
140 | 132 | ||
@@ -145,8 +137,8 @@ void cleanup_trampoline(void) | |||
145 | * if it's there. | 137 | * if it's there. |
146 | */ | 138 | */ |
147 | if ((void *)__native_read_cr3() == trampoline_pgtable) { | 139 | if ((void *)__native_read_cr3() == trampoline_pgtable) { |
148 | memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE); | 140 | memcpy(pgtable, trampoline_pgtable, PAGE_SIZE); |
149 | native_write_cr3((unsigned long)top_pgtable); | 141 | native_write_cr3((unsigned long)pgtable); |
150 | } | 142 | } |
151 | 143 | ||
152 | /* Restore trampoline memory */ | 144 | /* Restore trampoline memory */ |
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 57e3785d0d26..cf9911b5a53c 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h | |||
@@ -193,7 +193,7 @@ static inline int init_new_context(struct task_struct *tsk, | |||
193 | 193 | ||
194 | #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS | 194 | #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS |
195 | if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { | 195 | if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { |
196 | /* pkey 0 is the default and always allocated */ | 196 | /* pkey 0 is the default and allocated implicitly */ |
197 | mm->context.pkey_allocation_map = 0x1; | 197 | mm->context.pkey_allocation_map = 0x1; |
198 | /* -1 means unallocated or invalid */ | 198 | /* -1 means unallocated or invalid */ |
199 | mm->context.execute_only_pkey = -1; | 199 | mm->context.execute_only_pkey = -1; |
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index a0ba1ffda0df..851c04b7a092 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h | |||
@@ -2,6 +2,8 @@ | |||
2 | #ifndef _ASM_X86_PKEYS_H | 2 | #ifndef _ASM_X86_PKEYS_H |
3 | #define _ASM_X86_PKEYS_H | 3 | #define _ASM_X86_PKEYS_H |
4 | 4 | ||
5 | #define ARCH_DEFAULT_PKEY 0 | ||
6 | |||
5 | #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) | 7 | #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) |
6 | 8 | ||
7 | extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, | 9 | extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, |
@@ -15,7 +17,7 @@ extern int __execute_only_pkey(struct mm_struct *mm); | |||
15 | static inline int execute_only_pkey(struct mm_struct *mm) | 17 | static inline int execute_only_pkey(struct mm_struct *mm) |
16 | { | 18 | { |
17 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) | 19 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) |
18 | return 0; | 20 | return ARCH_DEFAULT_PKEY; |
19 | 21 | ||
20 | return __execute_only_pkey(mm); | 22 | return __execute_only_pkey(mm); |
21 | } | 23 | } |
@@ -49,13 +51,21 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) | |||
49 | { | 51 | { |
50 | /* | 52 | /* |
51 | * "Allocated" pkeys are those that have been returned | 53 | * "Allocated" pkeys are those that have been returned |
52 | * from pkey_alloc(). pkey 0 is special, and never | 54 | * from pkey_alloc() or pkey 0 which is allocated |
53 | * returned from pkey_alloc(). | 55 | * implicitly when the mm is created. |
54 | */ | 56 | */ |
55 | if (pkey <= 0) | 57 | if (pkey < 0) |
56 | return false; | 58 | return false; |
57 | if (pkey >= arch_max_pkey()) | 59 | if (pkey >= arch_max_pkey()) |
58 | return false; | 60 | return false; |
61 | /* | ||
62 | * The exec-only pkey is set in the allocation map, but | ||
63 | * is not available to any of the user interfaces like | ||
64 | * mprotect_pkey(). | ||
65 | */ | ||
66 | if (pkey == mm->context.execute_only_pkey) | ||
67 | return false; | ||
68 | |||
59 | return mm_pkey_allocation_map(mm) & (1U << pkey); | 69 | return mm_pkey_allocation_map(mm) & (1U << pkey); |
60 | } | 70 | } |
61 | 71 | ||
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 8b04234e010b..7685444a106b 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -116,6 +116,7 @@ static void init_x2apic_ldr(void) | |||
116 | goto update; | 116 | goto update; |
117 | } | 117 | } |
118 | cmsk = cluster_hotplug_mask; | 118 | cmsk = cluster_hotplug_mask; |
119 | cmsk->clusterid = cluster; | ||
119 | cluster_hotplug_mask = NULL; | 120 | cluster_hotplug_mask = NULL; |
120 | update: | 121 | update: |
121 | this_cpu_write(cluster_masks, cmsk); | 122 | this_cpu_write(cluster_masks, cmsk); |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 0c408f8c4ed4..2d29e47c056e 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -104,6 +104,12 @@ static bool __head check_la57_support(unsigned long physaddr) | |||
104 | } | 104 | } |
105 | #endif | 105 | #endif |
106 | 106 | ||
107 | /* Code in __startup_64() can be relocated during execution, but the compiler | ||
108 | * doesn't have to generate PC-relative relocations when accessing globals from | ||
109 | * that function. Clang actually does not generate them, which leads to | ||
110 | * boot-time crashes. To work around this problem, every global pointer must | ||
111 | * be adjusted using fixup_pointer(). | ||
112 | */ | ||
107 | unsigned long __head __startup_64(unsigned long physaddr, | 113 | unsigned long __head __startup_64(unsigned long physaddr, |
108 | struct boot_params *bp) | 114 | struct boot_params *bp) |
109 | { | 115 | { |
@@ -113,6 +119,7 @@ unsigned long __head __startup_64(unsigned long physaddr, | |||
113 | p4dval_t *p4d; | 119 | p4dval_t *p4d; |
114 | pudval_t *pud; | 120 | pudval_t *pud; |
115 | pmdval_t *pmd, pmd_entry; | 121 | pmdval_t *pmd, pmd_entry; |
122 | pteval_t *mask_ptr; | ||
116 | bool la57; | 123 | bool la57; |
117 | int i; | 124 | int i; |
118 | unsigned int *next_pgt_ptr; | 125 | unsigned int *next_pgt_ptr; |
@@ -196,7 +203,8 @@ unsigned long __head __startup_64(unsigned long physaddr, | |||
196 | 203 | ||
197 | pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; | 204 | pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; |
198 | /* Filter out unsupported __PAGE_KERNEL_* bits: */ | 205 | /* Filter out unsupported __PAGE_KERNEL_* bits: */ |
199 | pmd_entry &= __supported_pte_mask; | 206 | mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr); |
207 | pmd_entry &= *mask_ptr; | ||
200 | pmd_entry += sme_get_me_mask(); | 208 | pmd_entry += sme_get_me_mask(); |
201 | pmd_entry += physaddr; | 209 | pmd_entry += physaddr; |
202 | 210 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4b100fe0f508..12bb445fb98d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -542,6 +542,7 @@ void set_personality_64bit(void) | |||
542 | clear_thread_flag(TIF_X32); | 542 | clear_thread_flag(TIF_X32); |
543 | /* Pretend that this comes from a 64bit execve */ | 543 | /* Pretend that this comes from a 64bit execve */ |
544 | task_pt_regs(current)->orig_ax = __NR_execve; | 544 | task_pt_regs(current)->orig_ax = __NR_execve; |
545 | current_thread_info()->status &= ~TS_COMPAT; | ||
545 | 546 | ||
546 | /* Ensure the corresponding mm is not marked. */ | 547 | /* Ensure the corresponding mm is not marked. */ |
547 | if (current->mm) | 548 | if (current->mm) |
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index d7bc0eea20a5..6e98e0a7c923 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c | |||
@@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey | |||
94 | */ | 94 | */ |
95 | if (pkey != -1) | 95 | if (pkey != -1) |
96 | return pkey; | 96 | return pkey; |
97 | /* | 97 | |
98 | * Look for a protection-key-drive execute-only mapping | ||
99 | * which is now being given permissions that are not | ||
100 | * execute-only. Move it back to the default pkey. | ||
101 | */ | ||
102 | if (vma_is_pkey_exec_only(vma) && | ||
103 | (prot & (PROT_READ|PROT_WRITE))) { | ||
104 | return 0; | ||
105 | } | ||
106 | /* | 98 | /* |
107 | * The mapping is execute-only. Go try to get the | 99 | * The mapping is execute-only. Go try to get the |
108 | * execute-only protection key. If we fail to do that, | 100 | * execute-only protection key. If we fail to do that, |
109 | * fall through as if we do not have execute-only | 101 | * fall through as if we do not have execute-only |
110 | * support. | 102 | * support in this mm. |
111 | */ | 103 | */ |
112 | if (prot == PROT_EXEC) { | 104 | if (prot == PROT_EXEC) { |
113 | pkey = execute_only_pkey(vma->vm_mm); | 105 | pkey = execute_only_pkey(vma->vm_mm); |
114 | if (pkey > 0) | 106 | if (pkey > 0) |
115 | return pkey; | 107 | return pkey; |
108 | } else if (vma_is_pkey_exec_only(vma)) { | ||
109 | /* | ||
110 | * Protections are *not* PROT_EXEC, but the mapping | ||
111 | * is using the exec-only pkey. This mapping was | ||
112 | * PROT_EXEC and will no longer be. Move back to | ||
113 | * the default pkey. | ||
114 | */ | ||
115 | return ARCH_DEFAULT_PKEY; | ||
116 | } | 116 | } |
117 | |||
117 | /* | 118 | /* |
118 | * This is a vanilla, non-pkey mprotect (or we failed to | 119 | * This is a vanilla, non-pkey mprotect (or we failed to |
119 | * setup execute-only), inherit the pkey from the VMA we | 120 | * setup execute-only), inherit the pkey from the VMA we |
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index d744991c0f4f..39f66bc29b82 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile | |||
@@ -11,7 +11,7 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) | |||
11 | 11 | ||
12 | TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ | 12 | TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ |
13 | check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ | 13 | check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ |
14 | protection_keys test_vdso test_vsyscall | 14 | protection_keys test_vdso test_vsyscall mov_ss_trap |
15 | TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ | 15 | TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ |
16 | test_FCMOV test_FCOMI test_FISTTP \ | 16 | test_FCMOV test_FCOMI test_FISTTP \ |
17 | vdso_restorer | 17 | vdso_restorer |
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c new file mode 100644 index 000000000000..3c3a022654f3 --- /dev/null +++ b/tools/testing/selftests/x86/mov_ss_trap.c | |||
@@ -0,0 +1,285 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS | ||
4 | * | ||
5 | * This does MOV SS from a watchpointed address followed by various | ||
6 | * types of kernel entries. A MOV SS that hits a watchpoint will queue | ||
7 | * up a #DB trap but will not actually deliver that trap. The trap | ||
8 | * will be delivered after the next instruction instead. The CPU's logic | ||
9 | * seems to be: | ||
10 | * | ||
11 | * - Any fault: drop the pending #DB trap. | ||
12 | * - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then | ||
13 | * deliver #DB. | ||
14 | * - ICEBP: enter the kernel but do not deliver the watchpoint trap | ||
15 | * - breakpoint: only one #DB is delivered (phew!) | ||
16 | * | ||
17 | * There are plenty of ways for a kernel to handle this incorrectly. This | ||
18 | * test tries to exercise all the cases. | ||
19 | * | ||
20 | * This should mostly cover CVE-2018-1087 and CVE-2018-8897. | ||
21 | */ | ||
22 | #define _GNU_SOURCE | ||
23 | |||
24 | #include <stdlib.h> | ||
25 | #include <sys/ptrace.h> | ||
26 | #include <sys/types.h> | ||
27 | #include <sys/wait.h> | ||
28 | #include <sys/user.h> | ||
29 | #include <sys/syscall.h> | ||
30 | #include <unistd.h> | ||
31 | #include <errno.h> | ||
32 | #include <stddef.h> | ||
33 | #include <stdio.h> | ||
34 | #include <err.h> | ||
35 | #include <string.h> | ||
36 | #include <setjmp.h> | ||
37 | #include <sys/prctl.h> | ||
38 | |||
39 | #define X86_EFLAGS_RF (1UL << 16) | ||
40 | |||
41 | #if __x86_64__ | ||
42 | # define REG_IP REG_RIP | ||
43 | #else | ||
44 | # define REG_IP REG_EIP | ||
45 | #endif | ||
46 | |||
47 | unsigned short ss; | ||
48 | extern unsigned char breakpoint_insn[]; | ||
49 | sigjmp_buf jmpbuf; | ||
50 | static unsigned char altstack_data[SIGSTKSZ]; | ||
51 | |||
52 | static void enable_watchpoint(void) | ||
53 | { | ||
54 | pid_t parent = getpid(); | ||
55 | int status; | ||
56 | |||
57 | pid_t child = fork(); | ||
58 | if (child < 0) | ||
59 | err(1, "fork"); | ||
60 | |||
61 | if (child) { | ||
62 | if (waitpid(child, &status, 0) != child) | ||
63 | err(1, "waitpid for child"); | ||
64 | } else { | ||
65 | unsigned long dr0, dr1, dr7; | ||
66 | |||
67 | dr0 = (unsigned long)&ss; | ||
68 | dr1 = (unsigned long)breakpoint_insn; | ||
69 | dr7 = ((1UL << 1) | /* G0 */ | ||
70 | (3UL << 16) | /* RW0 = read or write */ | ||
71 | (1UL << 18) | /* LEN0 = 2 bytes */ | ||
72 | (1UL << 3)); /* G1, RW1 = insn */ | ||
73 | |||
74 | if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0) | ||
75 | err(1, "PTRACE_ATTACH"); | ||
76 | |||
77 | if (waitpid(parent, &status, 0) != parent) | ||
78 | err(1, "waitpid for child"); | ||
79 | |||
80 | if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0) | ||
81 | err(1, "PTRACE_POKEUSER DR0"); | ||
82 | |||
83 | if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0) | ||
84 | err(1, "PTRACE_POKEUSER DR1"); | ||
85 | |||
86 | if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0) | ||
87 | err(1, "PTRACE_POKEUSER DR7"); | ||
88 | |||
89 | printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7); | ||
90 | |||
91 | if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0) | ||
92 | err(1, "PTRACE_DETACH"); | ||
93 | |||
94 | exit(0); | ||
95 | } | ||
96 | } | ||
97 | |||
98 | static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), | ||
99 | int flags) | ||
100 | { | ||
101 | struct sigaction sa; | ||
102 | memset(&sa, 0, sizeof(sa)); | ||
103 | sa.sa_sigaction = handler; | ||
104 | sa.sa_flags = SA_SIGINFO | flags; | ||
105 | sigemptyset(&sa.sa_mask); | ||
106 | if (sigaction(sig, &sa, 0)) | ||
107 | err(1, "sigaction"); | ||
108 | } | ||
109 | |||
110 | static char const * const signames[] = { | ||
111 | [SIGSEGV] = "SIGSEGV", | ||
112 | [SIGBUS] = "SIBGUS", | ||
113 | [SIGTRAP] = "SIGTRAP", | ||
114 | [SIGILL] = "SIGILL", | ||
115 | }; | ||
116 | |||
117 | static void sigtrap(int sig, siginfo_t *si, void *ctx_void) | ||
118 | { | ||
119 | ucontext_t *ctx = ctx_void; | ||
120 | |||
121 | printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n", | ||
122 | (unsigned long)ctx->uc_mcontext.gregs[REG_IP], | ||
123 | !!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF)); | ||
124 | } | ||
125 | |||
126 | static void handle_and_return(int sig, siginfo_t *si, void *ctx_void) | ||
127 | { | ||
128 | ucontext_t *ctx = ctx_void; | ||
129 | |||
130 | printf("\tGot %s with RIP=%lx\n", signames[sig], | ||
131 | (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); | ||
132 | } | ||
133 | |||
134 | static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void) | ||
135 | { | ||
136 | ucontext_t *ctx = ctx_void; | ||
137 | |||
138 | printf("\tGot %s with RIP=%lx\n", signames[sig], | ||
139 | (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); | ||
140 | |||
141 | siglongjmp(jmpbuf, 1); | ||
142 | } | ||
143 | |||
144 | int main() | ||
145 | { | ||
146 | unsigned long nr; | ||
147 | |||
148 | asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss)); | ||
149 | printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss); | ||
150 | |||
151 | if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0) | ||
152 | printf("\tPR_SET_PTRACER_ANY succeeded\n"); | ||
153 | |||
154 | printf("\tSet up a watchpoint\n"); | ||
155 | sethandler(SIGTRAP, sigtrap, 0); | ||
156 | enable_watchpoint(); | ||
157 | |||
158 | printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n"); | ||
159 | asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss)); | ||
160 | |||
161 | printf("[RUN]\tMOV SS; INT3\n"); | ||
162 | asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss)); | ||
163 | |||
164 | printf("[RUN]\tMOV SS; INT 3\n"); | ||
165 | asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss)); | ||
166 | |||
167 | printf("[RUN]\tMOV SS; CS CS INT3\n"); | ||
168 | asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss)); | ||
169 | |||
170 | printf("[RUN]\tMOV SS; CSx14 INT3\n"); | ||
171 | asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss)); | ||
172 | |||
173 | printf("[RUN]\tMOV SS; INT 4\n"); | ||
174 | sethandler(SIGSEGV, handle_and_return, SA_RESETHAND); | ||
175 | asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss)); | ||
176 | |||
177 | #ifdef __i386__ | ||
178 | printf("[RUN]\tMOV SS; INTO\n"); | ||
179 | sethandler(SIGSEGV, handle_and_return, SA_RESETHAND); | ||
180 | nr = -1; | ||
181 | asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into" | ||
182 | : [tmp] "+r" (nr) : [ss] "m" (ss)); | ||
183 | #endif | ||
184 | |||
185 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
186 | printf("[RUN]\tMOV SS; ICEBP\n"); | ||
187 | |||
188 | /* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */ | ||
189 | sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND); | ||
190 | |||
191 | asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss)); | ||
192 | } | ||
193 | |||
194 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
195 | printf("[RUN]\tMOV SS; CLI\n"); | ||
196 | sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); | ||
197 | asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss)); | ||
198 | } | ||
199 | |||
200 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
201 | printf("[RUN]\tMOV SS; #PF\n"); | ||
202 | sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); | ||
203 | asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]" | ||
204 | : [tmp] "=r" (nr) : [ss] "m" (ss)); | ||
205 | } | ||
206 | |||
207 | /* | ||
208 | * INT $1: if #DB has DPL=3 and there isn't special handling, | ||
209 | * then the kernel will die. | ||
210 | */ | ||
211 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
212 | printf("[RUN]\tMOV SS; INT 1\n"); | ||
213 | sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); | ||
214 | asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss)); | ||
215 | } | ||
216 | |||
217 | #ifdef __x86_64__ | ||
218 | /* | ||
219 | * In principle, we should test 32-bit SYSCALL as well, but | ||
220 | * the calling convention is so unpredictable that it's | ||
221 | * not obviously worth the effort. | ||
222 | */ | ||
223 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
224 | printf("[RUN]\tMOV SS; SYSCALL\n"); | ||
225 | sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND); | ||
226 | nr = SYS_getpid; | ||
227 | /* | ||
228 | * Toggle the high bit of RSP to make it noncanonical to | ||
229 | * strengthen this test on non-SMAP systems. | ||
230 | */ | ||
231 | asm volatile ("btc $63, %%rsp\n\t" | ||
232 | "mov %[ss], %%ss; syscall\n\t" | ||
233 | "btc $63, %%rsp" | ||
234 | : "+a" (nr) : [ss] "m" (ss) | ||
235 | : "rcx" | ||
236 | #ifdef __x86_64__ | ||
237 | , "r11" | ||
238 | #endif | ||
239 | ); | ||
240 | } | ||
241 | #endif | ||
242 | |||
243 | printf("[RUN]\tMOV SS; breakpointed NOP\n"); | ||
244 | asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss)); | ||
245 | |||
246 | /* | ||
247 | * Invoking SYSENTER directly breaks all the rules. Just handle | ||
248 | * the SIGSEGV. | ||
249 | */ | ||
250 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
251 | printf("[RUN]\tMOV SS; SYSENTER\n"); | ||
252 | stack_t stack = { | ||
253 | .ss_sp = altstack_data, | ||
254 | .ss_size = SIGSTKSZ, | ||
255 | }; | ||
256 | if (sigaltstack(&stack, NULL) != 0) | ||
257 | err(1, "sigaltstack"); | ||
258 | sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK); | ||
259 | nr = SYS_getpid; | ||
260 | asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr) | ||
261 | : [ss] "m" (ss) : "flags", "rcx" | ||
262 | #ifdef __x86_64__ | ||
263 | , "r11" | ||
264 | #endif | ||
265 | ); | ||
266 | |||
267 | /* We're unreachable here. SYSENTER forgets RIP. */ | ||
268 | } | ||
269 | |||
270 | if (sigsetjmp(jmpbuf, 1) == 0) { | ||
271 | printf("[RUN]\tMOV SS; INT $0x80\n"); | ||
272 | sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); | ||
273 | nr = 20; /* compat getpid */ | ||
274 | asm volatile ("mov %[ss], %%ss; int $0x80" | ||
275 | : "+a" (nr) : [ss] "m" (ss) | ||
276 | : "flags" | ||
277 | #ifdef __x86_64__ | ||
278 | , "r8", "r9", "r10", "r11" | ||
279 | #endif | ||
280 | ); | ||
281 | } | ||
282 | |||
283 | printf("[OK]\tI aten't dead\n"); | ||
284 | return 0; | ||
285 | } | ||
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index 9c0325e1ea68..50f7e9272481 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c | |||
@@ -368,6 +368,11 @@ static int expected_bnd_index = -1; | |||
368 | uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */ | 368 | uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */ |
369 | unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS]; | 369 | unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS]; |
370 | 370 | ||
371 | /* Failed address bound checks: */ | ||
372 | #ifndef SEGV_BNDERR | ||
373 | # define SEGV_BNDERR 3 | ||
374 | #endif | ||
375 | |||
371 | /* | 376 | /* |
372 | * The kernel is supposed to provide some information about the bounds | 377 | * The kernel is supposed to provide some information about the bounds |
373 | * exception in the siginfo. It should match what we have in the bounds | 378 | * exception in the siginfo. It should match what we have in the bounds |
@@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext) | |||
419 | br_count++; | 424 | br_count++; |
420 | dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); | 425 | dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); |
421 | 426 | ||
422 | #define SEGV_BNDERR 3 /* failed address bound checks */ | ||
423 | |||
424 | dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", | 427 | dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", |
425 | status, ip, br_reason); | 428 | status, ip, br_reason); |
426 | dprintf2("si_signo: %d\n", si->si_signo); | 429 | dprintf2("si_signo: %d\n", si->si_signo); |
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h index b3cb7670e026..254e5436bdd9 100644 --- a/tools/testing/selftests/x86/pkey-helpers.h +++ b/tools/testing/selftests/x86/pkey-helpers.h | |||
@@ -26,30 +26,26 @@ static inline void sigsafe_printf(const char *format, ...) | |||
26 | { | 26 | { |
27 | va_list ap; | 27 | va_list ap; |
28 | 28 | ||
29 | va_start(ap, format); | ||
30 | if (!dprint_in_signal) { | 29 | if (!dprint_in_signal) { |
30 | va_start(ap, format); | ||
31 | vprintf(format, ap); | 31 | vprintf(format, ap); |
32 | va_end(ap); | ||
32 | } else { | 33 | } else { |
33 | int ret; | 34 | int ret; |
34 | int len = vsnprintf(dprint_in_signal_buffer, | ||
35 | DPRINT_IN_SIGNAL_BUF_SIZE, | ||
36 | format, ap); | ||
37 | /* | 35 | /* |
38 | * len is amount that would have been printed, | 36 | * No printf() functions are signal-safe. |
39 | * but actual write is truncated at BUF_SIZE. | 37 | * They deadlock easily. Write the format |
38 | * string to get some output, even if | ||
39 | * incomplete. | ||
40 | */ | 40 | */ |
41 | if (len > DPRINT_IN_SIGNAL_BUF_SIZE) | 41 | ret = write(1, format, strlen(format)); |
42 | len = DPRINT_IN_SIGNAL_BUF_SIZE; | ||
43 | ret = write(1, dprint_in_signal_buffer, len); | ||
44 | if (ret < 0) | 42 | if (ret < 0) |
45 | abort(); | 43 | exit(1); |
46 | } | 44 | } |
47 | va_end(ap); | ||
48 | } | 45 | } |
49 | #define dprintf_level(level, args...) do { \ | 46 | #define dprintf_level(level, args...) do { \ |
50 | if (level <= DEBUG_LEVEL) \ | 47 | if (level <= DEBUG_LEVEL) \ |
51 | sigsafe_printf(args); \ | 48 | sigsafe_printf(args); \ |
52 | fflush(NULL); \ | ||
53 | } while (0) | 49 | } while (0) |
54 | #define dprintf0(args...) dprintf_level(0, args) | 50 | #define dprintf0(args...) dprintf_level(0, args) |
55 | #define dprintf1(args...) dprintf_level(1, args) | 51 | #define dprintf1(args...) dprintf_level(1, args) |
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index f15aa5a76fe3..460b4bdf4c1e 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c | |||
@@ -72,10 +72,9 @@ extern void abort_hooks(void); | |||
72 | test_nr, iteration_nr); \ | 72 | test_nr, iteration_nr); \ |
73 | dprintf0("errno at assert: %d", errno); \ | 73 | dprintf0("errno at assert: %d", errno); \ |
74 | abort_hooks(); \ | 74 | abort_hooks(); \ |
75 | assert(condition); \ | 75 | exit(__LINE__); \ |
76 | } \ | 76 | } \ |
77 | } while (0) | 77 | } while (0) |
78 | #define raw_assert(cond) assert(cond) | ||
79 | 78 | ||
80 | void cat_into_file(char *str, char *file) | 79 | void cat_into_file(char *str, char *file) |
81 | { | 80 | { |
@@ -87,12 +86,17 @@ void cat_into_file(char *str, char *file) | |||
87 | * these need to be raw because they are called under | 86 | * these need to be raw because they are called under |
88 | * pkey_assert() | 87 | * pkey_assert() |
89 | */ | 88 | */ |
90 | raw_assert(fd >= 0); | 89 | if (fd < 0) { |
90 | fprintf(stderr, "error opening '%s'\n", str); | ||
91 | perror("error: "); | ||
92 | exit(__LINE__); | ||
93 | } | ||
94 | |||
91 | ret = write(fd, str, strlen(str)); | 95 | ret = write(fd, str, strlen(str)); |
92 | if (ret != strlen(str)) { | 96 | if (ret != strlen(str)) { |
93 | perror("write to file failed"); | 97 | perror("write to file failed"); |
94 | fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); | 98 | fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); |
95 | raw_assert(0); | 99 | exit(__LINE__); |
96 | } | 100 | } |
97 | close(fd); | 101 | close(fd); |
98 | } | 102 | } |
@@ -191,26 +195,30 @@ void lots_o_noops_around_write(int *write_to_me) | |||
191 | #ifdef __i386__ | 195 | #ifdef __i386__ |
192 | 196 | ||
193 | #ifndef SYS_mprotect_key | 197 | #ifndef SYS_mprotect_key |
194 | # define SYS_mprotect_key 380 | 198 | # define SYS_mprotect_key 380 |
195 | #endif | 199 | #endif |
200 | |||
196 | #ifndef SYS_pkey_alloc | 201 | #ifndef SYS_pkey_alloc |
197 | # define SYS_pkey_alloc 381 | 202 | # define SYS_pkey_alloc 381 |
198 | # define SYS_pkey_free 382 | 203 | # define SYS_pkey_free 382 |
199 | #endif | 204 | #endif |
200 | #define REG_IP_IDX REG_EIP | 205 | |
201 | #define si_pkey_offset 0x14 | 206 | #define REG_IP_IDX REG_EIP |
207 | #define si_pkey_offset 0x14 | ||
202 | 208 | ||
203 | #else | 209 | #else |
204 | 210 | ||
205 | #ifndef SYS_mprotect_key | 211 | #ifndef SYS_mprotect_key |
206 | # define SYS_mprotect_key 329 | 212 | # define SYS_mprotect_key 329 |
207 | #endif | 213 | #endif |
214 | |||
208 | #ifndef SYS_pkey_alloc | 215 | #ifndef SYS_pkey_alloc |
209 | # define SYS_pkey_alloc 330 | 216 | # define SYS_pkey_alloc 330 |
210 | # define SYS_pkey_free 331 | 217 | # define SYS_pkey_free 331 |
211 | #endif | 218 | #endif |
212 | #define REG_IP_IDX REG_RIP | 219 | |
213 | #define si_pkey_offset 0x20 | 220 | #define REG_IP_IDX REG_RIP |
221 | #define si_pkey_offset 0x20 | ||
214 | 222 | ||
215 | #endif | 223 | #endif |
216 | 224 | ||
@@ -225,8 +233,14 @@ void dump_mem(void *dumpme, int len_bytes) | |||
225 | } | 233 | } |
226 | } | 234 | } |
227 | 235 | ||
228 | #define SEGV_BNDERR 3 /* failed address bound checks */ | 236 | /* Failed address bound checks: */ |
229 | #define SEGV_PKUERR 4 | 237 | #ifndef SEGV_BNDERR |
238 | # define SEGV_BNDERR 3 | ||
239 | #endif | ||
240 | |||
241 | #ifndef SEGV_PKUERR | ||
242 | # define SEGV_PKUERR 4 | ||
243 | #endif | ||
230 | 244 | ||
231 | static char *si_code_str(int si_code) | 245 | static char *si_code_str(int si_code) |
232 | { | 246 | { |
@@ -289,13 +303,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) | |||
289 | dump_mem(pkru_ptr - 128, 256); | 303 | dump_mem(pkru_ptr - 128, 256); |
290 | pkey_assert(*pkru_ptr); | 304 | pkey_assert(*pkru_ptr); |
291 | 305 | ||
292 | si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); | ||
293 | dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); | ||
294 | dump_mem(si_pkey_ptr - 8, 24); | ||
295 | siginfo_pkey = *si_pkey_ptr; | ||
296 | pkey_assert(siginfo_pkey < NR_PKEYS); | ||
297 | last_si_pkey = siginfo_pkey; | ||
298 | |||
299 | if ((si->si_code == SEGV_MAPERR) || | 306 | if ((si->si_code == SEGV_MAPERR) || |
300 | (si->si_code == SEGV_ACCERR) || | 307 | (si->si_code == SEGV_ACCERR) || |
301 | (si->si_code == SEGV_BNDERR)) { | 308 | (si->si_code == SEGV_BNDERR)) { |
@@ -303,6 +310,13 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) | |||
303 | exit(4); | 310 | exit(4); |
304 | } | 311 | } |
305 | 312 | ||
313 | si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); | ||
314 | dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); | ||
315 | dump_mem((u8 *)si_pkey_ptr - 8, 24); | ||
316 | siginfo_pkey = *si_pkey_ptr; | ||
317 | pkey_assert(siginfo_pkey < NR_PKEYS); | ||
318 | last_si_pkey = siginfo_pkey; | ||
319 | |||
306 | dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); | 320 | dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); |
307 | /* need __rdpkru() version so we do not do shadow_pkru checking */ | 321 | /* need __rdpkru() version so we do not do shadow_pkru checking */ |
308 | dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); | 322 | dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); |
@@ -311,22 +325,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) | |||
311 | dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); | 325 | dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); |
312 | pkru_faults++; | 326 | pkru_faults++; |
313 | dprintf1("<<<<==================================================\n"); | 327 | dprintf1("<<<<==================================================\n"); |
314 | return; | ||
315 | if (trapno == 14) { | ||
316 | fprintf(stderr, | ||
317 | "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", | ||
318 | trapno, ip); | ||
319 | fprintf(stderr, "si_addr %p\n", si->si_addr); | ||
320 | fprintf(stderr, "REG_ERR: %lx\n", | ||
321 | (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); | ||
322 | exit(1); | ||
323 | } else { | ||
324 | fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip); | ||
325 | fprintf(stderr, "si_addr %p\n", si->si_addr); | ||
326 | fprintf(stderr, "REG_ERR: %lx\n", | ||
327 | (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); | ||
328 | exit(2); | ||
329 | } | ||
330 | dprint_in_signal = 0; | 328 | dprint_in_signal = 0; |
331 | } | 329 | } |
332 | 330 | ||
@@ -393,10 +391,15 @@ pid_t fork_lazy_child(void) | |||
393 | return forkret; | 391 | return forkret; |
394 | } | 392 | } |
395 | 393 | ||
396 | #define PKEY_DISABLE_ACCESS 0x1 | 394 | #ifndef PKEY_DISABLE_ACCESS |
397 | #define PKEY_DISABLE_WRITE 0x2 | 395 | # define PKEY_DISABLE_ACCESS 0x1 |
396 | #endif | ||
397 | |||
398 | #ifndef PKEY_DISABLE_WRITE | ||
399 | # define PKEY_DISABLE_WRITE 0x2 | ||
400 | #endif | ||
398 | 401 | ||
399 | u32 pkey_get(int pkey, unsigned long flags) | 402 | static u32 hw_pkey_get(int pkey, unsigned long flags) |
400 | { | 403 | { |
401 | u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); | 404 | u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); |
402 | u32 pkru = __rdpkru(); | 405 | u32 pkru = __rdpkru(); |
@@ -418,7 +421,7 @@ u32 pkey_get(int pkey, unsigned long flags) | |||
418 | return masked_pkru; | 421 | return masked_pkru; |
419 | } | 422 | } |
420 | 423 | ||
421 | int pkey_set(int pkey, unsigned long rights, unsigned long flags) | 424 | static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) |
422 | { | 425 | { |
423 | u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); | 426 | u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); |
424 | u32 old_pkru = __rdpkru(); | 427 | u32 old_pkru = __rdpkru(); |
@@ -452,15 +455,15 @@ void pkey_disable_set(int pkey, int flags) | |||
452 | pkey, flags); | 455 | pkey, flags); |
453 | pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); | 456 | pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); |
454 | 457 | ||
455 | pkey_rights = pkey_get(pkey, syscall_flags); | 458 | pkey_rights = hw_pkey_get(pkey, syscall_flags); |
456 | 459 | ||
457 | dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, | 460 | dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, |
458 | pkey, pkey, pkey_rights); | 461 | pkey, pkey, pkey_rights); |
459 | pkey_assert(pkey_rights >= 0); | 462 | pkey_assert(pkey_rights >= 0); |
460 | 463 | ||
461 | pkey_rights |= flags; | 464 | pkey_rights |= flags; |
462 | 465 | ||
463 | ret = pkey_set(pkey, pkey_rights, syscall_flags); | 466 | ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); |
464 | assert(!ret); | 467 | assert(!ret); |
465 | /*pkru and flags have the same format */ | 468 | /*pkru and flags have the same format */ |
466 | shadow_pkru |= flags << (pkey * 2); | 469 | shadow_pkru |= flags << (pkey * 2); |
@@ -468,8 +471,8 @@ void pkey_disable_set(int pkey, int flags) | |||
468 | 471 | ||
469 | pkey_assert(ret >= 0); | 472 | pkey_assert(ret >= 0); |
470 | 473 | ||
471 | pkey_rights = pkey_get(pkey, syscall_flags); | 474 | pkey_rights = hw_pkey_get(pkey, syscall_flags); |
472 | dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, | 475 | dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, |
473 | pkey, pkey, pkey_rights); | 476 | pkey, pkey, pkey_rights); |
474 | 477 | ||
475 | dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); | 478 | dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); |
@@ -483,24 +486,24 @@ void pkey_disable_clear(int pkey, int flags) | |||
483 | { | 486 | { |
484 | unsigned long syscall_flags = 0; | 487 | unsigned long syscall_flags = 0; |
485 | int ret; | 488 | int ret; |
486 | int pkey_rights = pkey_get(pkey, syscall_flags); | 489 | int pkey_rights = hw_pkey_get(pkey, syscall_flags); |
487 | u32 orig_pkru = rdpkru(); | 490 | u32 orig_pkru = rdpkru(); |
488 | 491 | ||
489 | pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); | 492 | pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); |
490 | 493 | ||
491 | dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, | 494 | dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, |
492 | pkey, pkey, pkey_rights); | 495 | pkey, pkey, pkey_rights); |
493 | pkey_assert(pkey_rights >= 0); | 496 | pkey_assert(pkey_rights >= 0); |
494 | 497 | ||
495 | pkey_rights |= flags; | 498 | pkey_rights |= flags; |
496 | 499 | ||
497 | ret = pkey_set(pkey, pkey_rights, 0); | 500 | ret = hw_pkey_set(pkey, pkey_rights, 0); |
498 | /* pkru and flags have the same format */ | 501 | /* pkru and flags have the same format */ |
499 | shadow_pkru &= ~(flags << (pkey * 2)); | 502 | shadow_pkru &= ~(flags << (pkey * 2)); |
500 | pkey_assert(ret >= 0); | 503 | pkey_assert(ret >= 0); |
501 | 504 | ||
502 | pkey_rights = pkey_get(pkey, syscall_flags); | 505 | pkey_rights = hw_pkey_get(pkey, syscall_flags); |
503 | dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, | 506 | dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, |
504 | pkey, pkey, pkey_rights); | 507 | pkey, pkey, pkey_rights); |
505 | 508 | ||
506 | dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); | 509 | dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); |
@@ -674,10 +677,12 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, | |||
674 | struct pkey_malloc_record { | 677 | struct pkey_malloc_record { |
675 | void *ptr; | 678 | void *ptr; |
676 | long size; | 679 | long size; |
680 | int prot; | ||
677 | }; | 681 | }; |
678 | struct pkey_malloc_record *pkey_malloc_records; | 682 | struct pkey_malloc_record *pkey_malloc_records; |
683 | struct pkey_malloc_record *pkey_last_malloc_record; | ||
679 | long nr_pkey_malloc_records; | 684 | long nr_pkey_malloc_records; |
680 | void record_pkey_malloc(void *ptr, long size) | 685 | void record_pkey_malloc(void *ptr, long size, int prot) |
681 | { | 686 | { |
682 | long i; | 687 | long i; |
683 | struct pkey_malloc_record *rec = NULL; | 688 | struct pkey_malloc_record *rec = NULL; |
@@ -709,6 +714,8 @@ void record_pkey_malloc(void *ptr, long size) | |||
709 | (int)(rec - pkey_malloc_records), rec, ptr, size); | 714 | (int)(rec - pkey_malloc_records), rec, ptr, size); |
710 | rec->ptr = ptr; | 715 | rec->ptr = ptr; |
711 | rec->size = size; | 716 | rec->size = size; |
717 | rec->prot = prot; | ||
718 | pkey_last_malloc_record = rec; | ||
712 | nr_pkey_malloc_records++; | 719 | nr_pkey_malloc_records++; |
713 | } | 720 | } |
714 | 721 | ||
@@ -753,7 +760,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) | |||
753 | pkey_assert(ptr != (void *)-1); | 760 | pkey_assert(ptr != (void *)-1); |
754 | ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); | 761 | ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); |
755 | pkey_assert(!ret); | 762 | pkey_assert(!ret); |
756 | record_pkey_malloc(ptr, size); | 763 | record_pkey_malloc(ptr, size, prot); |
757 | rdpkru(); | 764 | rdpkru(); |
758 | 765 | ||
759 | dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); | 766 | dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); |
@@ -774,7 +781,7 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) | |||
774 | size = ALIGN_UP(size, HPAGE_SIZE * 2); | 781 | size = ALIGN_UP(size, HPAGE_SIZE * 2); |
775 | ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); | 782 | ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); |
776 | pkey_assert(ptr != (void *)-1); | 783 | pkey_assert(ptr != (void *)-1); |
777 | record_pkey_malloc(ptr, size); | 784 | record_pkey_malloc(ptr, size, prot); |
778 | mprotect_pkey(ptr, size, prot, pkey); | 785 | mprotect_pkey(ptr, size, prot, pkey); |
779 | 786 | ||
780 | dprintf1("unaligned ptr: %p\n", ptr); | 787 | dprintf1("unaligned ptr: %p\n", ptr); |
@@ -847,7 +854,7 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) | |||
847 | pkey_assert(ptr != (void *)-1); | 854 | pkey_assert(ptr != (void *)-1); |
848 | mprotect_pkey(ptr, size, prot, pkey); | 855 | mprotect_pkey(ptr, size, prot, pkey); |
849 | 856 | ||
850 | record_pkey_malloc(ptr, size); | 857 | record_pkey_malloc(ptr, size, prot); |
851 | 858 | ||
852 | dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); | 859 | dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); |
853 | return ptr; | 860 | return ptr; |
@@ -869,7 +876,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) | |||
869 | 876 | ||
870 | mprotect_pkey(ptr, size, prot, pkey); | 877 | mprotect_pkey(ptr, size, prot, pkey); |
871 | 878 | ||
872 | record_pkey_malloc(ptr, size); | 879 | record_pkey_malloc(ptr, size, prot); |
873 | 880 | ||
874 | dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); | 881 | dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); |
875 | close(fd); | 882 | close(fd); |
@@ -918,13 +925,21 @@ void *malloc_pkey(long size, int prot, u16 pkey) | |||
918 | } | 925 | } |
919 | 926 | ||
920 | int last_pkru_faults; | 927 | int last_pkru_faults; |
928 | #define UNKNOWN_PKEY -2 | ||
921 | void expected_pk_fault(int pkey) | 929 | void expected_pk_fault(int pkey) |
922 | { | 930 | { |
923 | dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", | 931 | dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", |
924 | __func__, last_pkru_faults, pkru_faults); | 932 | __func__, last_pkru_faults, pkru_faults); |
925 | dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); | 933 | dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); |
926 | pkey_assert(last_pkru_faults + 1 == pkru_faults); | 934 | pkey_assert(last_pkru_faults + 1 == pkru_faults); |
927 | pkey_assert(last_si_pkey == pkey); | 935 | |
936 | /* | ||
937 | * For exec-only memory, we do not know the pkey in | ||
938 | * advance, so skip this check. | ||
939 | */ | ||
940 | if (pkey != UNKNOWN_PKEY) | ||
941 | pkey_assert(last_si_pkey == pkey); | ||
942 | |||
928 | /* | 943 | /* |
929 | * The signal handler shold have cleared out PKRU to let the | 944 | * The signal handler shold have cleared out PKRU to let the |
930 | * test program continue. We now have to restore it. | 945 | * test program continue. We now have to restore it. |
@@ -939,10 +954,11 @@ void expected_pk_fault(int pkey) | |||
939 | last_si_pkey = -1; | 954 | last_si_pkey = -1; |
940 | } | 955 | } |
941 | 956 | ||
942 | void do_not_expect_pk_fault(void) | 957 | #define do_not_expect_pk_fault(msg) do { \ |
943 | { | 958 | if (last_pkru_faults != pkru_faults) \ |
944 | pkey_assert(last_pkru_faults == pkru_faults); | 959 | dprintf0("unexpected PK fault: %s\n", msg); \ |
945 | } | 960 | pkey_assert(last_pkru_faults == pkru_faults); \ |
961 | } while (0) | ||
946 | 962 | ||
947 | int test_fds[10] = { -1 }; | 963 | int test_fds[10] = { -1 }; |
948 | int nr_test_fds; | 964 | int nr_test_fds; |
@@ -1151,12 +1167,15 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) | |||
1151 | pkey_assert(i < NR_PKEYS*2); | 1167 | pkey_assert(i < NR_PKEYS*2); |
1152 | 1168 | ||
1153 | /* | 1169 | /* |
1154 | * There are 16 pkeys supported in hardware. One is taken | 1170 | * There are 16 pkeys supported in hardware. Three are |
1155 | * up for the default (0) and another can be taken up by | 1171 | * allocated by the time we get here: |
1156 | * an execute-only mapping. Ensure that we can allocate | 1172 | * 1. The default key (0) |
1157 | * at least 14 (16-2). | 1173 | * 2. One possibly consumed by an execute-only mapping. |
1174 | * 3. One allocated by the test code and passed in via | ||
1175 | * 'pkey' to this function. | ||
1176 | * Ensure that we can allocate at least another 13 (16-3). | ||
1158 | */ | 1177 | */ |
1159 | pkey_assert(i >= NR_PKEYS-2); | 1178 | pkey_assert(i >= NR_PKEYS-3); |
1160 | 1179 | ||
1161 | for (i = 0; i < nr_allocated_pkeys; i++) { | 1180 | for (i = 0; i < nr_allocated_pkeys; i++) { |
1162 | err = sys_pkey_free(allocated_pkeys[i]); | 1181 | err = sys_pkey_free(allocated_pkeys[i]); |
@@ -1165,6 +1184,35 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) | |||
1165 | } | 1184 | } |
1166 | } | 1185 | } |
1167 | 1186 | ||
1187 | /* | ||
1188 | * pkey 0 is special. It is allocated by default, so you do not | ||
1189 | * have to call pkey_alloc() to use it first. Make sure that it | ||
1190 | * is usable. | ||
1191 | */ | ||
1192 | void test_mprotect_with_pkey_0(int *ptr, u16 pkey) | ||
1193 | { | ||
1194 | long size; | ||
1195 | int prot; | ||
1196 | |||
1197 | assert(pkey_last_malloc_record); | ||
1198 | size = pkey_last_malloc_record->size; | ||
1199 | /* | ||
1200 | * This is a bit of a hack. But mprotect() requires | ||
1201 | * huge-page-aligned sizes when operating on hugetlbfs. | ||
1202 | * So, make sure that we use something that's a multiple | ||
1203 | * of a huge page when we can. | ||
1204 | */ | ||
1205 | if (size >= HPAGE_SIZE) | ||
1206 | size = HPAGE_SIZE; | ||
1207 | prot = pkey_last_malloc_record->prot; | ||
1208 | |||
1209 | /* Use pkey 0 */ | ||
1210 | mprotect_pkey(ptr, size, prot, 0); | ||
1211 | |||
1212 | /* Make sure that we can set it back to the original pkey. */ | ||
1213 | mprotect_pkey(ptr, size, prot, pkey); | ||
1214 | } | ||
1215 | |||
1168 | void test_ptrace_of_child(int *ptr, u16 pkey) | 1216 | void test_ptrace_of_child(int *ptr, u16 pkey) |
1169 | { | 1217 | { |
1170 | __attribute__((__unused__)) int peek_result; | 1218 | __attribute__((__unused__)) int peek_result; |
@@ -1228,7 +1276,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) | |||
1228 | pkey_assert(ret != -1); | 1276 | pkey_assert(ret != -1); |
1229 | /* Now access from the current task, and expect NO exception: */ | 1277 | /* Now access from the current task, and expect NO exception: */ |
1230 | peek_result = read_ptr(plain_ptr); | 1278 | peek_result = read_ptr(plain_ptr); |
1231 | do_not_expect_pk_fault(); | 1279 | do_not_expect_pk_fault("read plain pointer after ptrace"); |
1232 | 1280 | ||
1233 | ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); | 1281 | ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); |
1234 | pkey_assert(ret != -1); | 1282 | pkey_assert(ret != -1); |
@@ -1241,12 +1289,9 @@ void test_ptrace_of_child(int *ptr, u16 pkey) | |||
1241 | free(plain_ptr_unaligned); | 1289 | free(plain_ptr_unaligned); |
1242 | } | 1290 | } |
1243 | 1291 | ||
1244 | void test_executing_on_unreadable_memory(int *ptr, u16 pkey) | 1292 | void *get_pointer_to_instructions(void) |
1245 | { | 1293 | { |
1246 | void *p1; | 1294 | void *p1; |
1247 | int scratch; | ||
1248 | int ptr_contents; | ||
1249 | int ret; | ||
1250 | 1295 | ||
1251 | p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); | 1296 | p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); |
1252 | dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); | 1297 | dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); |
@@ -1256,7 +1301,23 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) | |||
1256 | /* Point 'p1' at the *second* page of the function: */ | 1301 | /* Point 'p1' at the *second* page of the function: */ |
1257 | p1 += PAGE_SIZE; | 1302 | p1 += PAGE_SIZE; |
1258 | 1303 | ||
1304 | /* | ||
1305 | * Try to ensure we fault this in on next touch to ensure | ||
1306 | * we get an instruction fault as opposed to a data one | ||
1307 | */ | ||
1259 | madvise(p1, PAGE_SIZE, MADV_DONTNEED); | 1308 | madvise(p1, PAGE_SIZE, MADV_DONTNEED); |
1309 | |||
1310 | return p1; | ||
1311 | } | ||
1312 | |||
1313 | void test_executing_on_unreadable_memory(int *ptr, u16 pkey) | ||
1314 | { | ||
1315 | void *p1; | ||
1316 | int scratch; | ||
1317 | int ptr_contents; | ||
1318 | int ret; | ||
1319 | |||
1320 | p1 = get_pointer_to_instructions(); | ||
1260 | lots_o_noops_around_write(&scratch); | 1321 | lots_o_noops_around_write(&scratch); |
1261 | ptr_contents = read_ptr(p1); | 1322 | ptr_contents = read_ptr(p1); |
1262 | dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); | 1323 | dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); |
@@ -1272,12 +1333,55 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) | |||
1272 | */ | 1333 | */ |
1273 | madvise(p1, PAGE_SIZE, MADV_DONTNEED); | 1334 | madvise(p1, PAGE_SIZE, MADV_DONTNEED); |
1274 | lots_o_noops_around_write(&scratch); | 1335 | lots_o_noops_around_write(&scratch); |
1275 | do_not_expect_pk_fault(); | 1336 | do_not_expect_pk_fault("executing on PROT_EXEC memory"); |
1276 | ptr_contents = read_ptr(p1); | 1337 | ptr_contents = read_ptr(p1); |
1277 | dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); | 1338 | dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); |
1278 | expected_pk_fault(pkey); | 1339 | expected_pk_fault(pkey); |
1279 | } | 1340 | } |
1280 | 1341 | ||
1342 | void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) | ||
1343 | { | ||
1344 | void *p1; | ||
1345 | int scratch; | ||
1346 | int ptr_contents; | ||
1347 | int ret; | ||
1348 | |||
1349 | dprintf1("%s() start\n", __func__); | ||
1350 | |||
1351 | p1 = get_pointer_to_instructions(); | ||
1352 | lots_o_noops_around_write(&scratch); | ||
1353 | ptr_contents = read_ptr(p1); | ||
1354 | dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); | ||
1355 | |||
1356 | /* Use a *normal* mprotect(), not mprotect_pkey(): */ | ||
1357 | ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); | ||
1358 | pkey_assert(!ret); | ||
1359 | |||
1360 | dprintf2("pkru: %x\n", rdpkru()); | ||
1361 | |||
1362 | /* Make sure this is an *instruction* fault */ | ||
1363 | madvise(p1, PAGE_SIZE, MADV_DONTNEED); | ||
1364 | lots_o_noops_around_write(&scratch); | ||
1365 | do_not_expect_pk_fault("executing on PROT_EXEC memory"); | ||
1366 | ptr_contents = read_ptr(p1); | ||
1367 | dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); | ||
1368 | expected_pk_fault(UNKNOWN_PKEY); | ||
1369 | |||
1370 | /* | ||
1371 | * Put the memory back to non-PROT_EXEC. Should clear the | ||
1372 | * exec-only pkey off the VMA and allow it to be readable | ||
1373 | * again. Go to PROT_NONE first to check for a kernel bug | ||
1374 | * that did not clear the pkey when doing PROT_NONE. | ||
1375 | */ | ||
1376 | ret = mprotect(p1, PAGE_SIZE, PROT_NONE); | ||
1377 | pkey_assert(!ret); | ||
1378 | |||
1379 | ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); | ||
1380 | pkey_assert(!ret); | ||
1381 | ptr_contents = read_ptr(p1); | ||
1382 | do_not_expect_pk_fault("plain read on recently PROT_EXEC area"); | ||
1383 | } | ||
1384 | |||
1281 | void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) | 1385 | void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) |
1282 | { | 1386 | { |
1283 | int size = PAGE_SIZE; | 1387 | int size = PAGE_SIZE; |
@@ -1302,6 +1406,8 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { | |||
1302 | test_kernel_gup_of_access_disabled_region, | 1406 | test_kernel_gup_of_access_disabled_region, |
1303 | test_kernel_gup_write_to_write_disabled_region, | 1407 | test_kernel_gup_write_to_write_disabled_region, |
1304 | test_executing_on_unreadable_memory, | 1408 | test_executing_on_unreadable_memory, |
1409 | test_implicit_mprotect_exec_only_memory, | ||
1410 | test_mprotect_with_pkey_0, | ||
1305 | test_ptrace_of_child, | 1411 | test_ptrace_of_child, |
1306 | test_pkey_syscalls_on_non_allocated_pkey, | 1412 | test_pkey_syscalls_on_non_allocated_pkey, |
1307 | test_pkey_syscalls_bad_args, | 1413 | test_pkey_syscalls_bad_args, |