aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/boot/bitops.h8
-rw-r--r--arch/x86/boot/boot.h18
-rw-r--r--arch/x86/boot/cpu.c2
-rw-r--r--arch/x86/boot/cpucheck.c33
-rw-r--r--arch/x86/boot/cpuflags.c1
-rw-r--r--arch/x86/boot/cpuflags.h1
-rw-r--r--arch/x86/boot/string.c2
-rw-r--r--arch/x86/entry/common.c6
-rw-r--r--arch/x86/entry/entry_32.S11
-rw-r--r--arch/x86/entry/entry_64.S11
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl4
-rw-r--r--arch/x86/entry/thunk_64.S6
-rw-r--r--arch/x86/entry/vdso/Makefile5
-rw-r--r--arch/x86/entry/vdso/vdso32/sigreturn.S8
-rw-r--r--arch/x86/entry/vdso/vdso32/system_call.S7
-rw-r--r--arch/x86/entry/vdso/vma.c47
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c10
-rw-r--r--arch/x86/include/asm/apm.h6
-rw-r--r--arch/x86/include/asm/arch_hweight.h24
-rw-r--r--arch/x86/include/asm/archrandom.h132
-rw-r--r--arch/x86/include/asm/asm.h12
-rw-r--r--arch/x86/include/asm/atomic.h16
-rw-r--r--arch/x86/include/asm/atomic64_64.h18
-rw-r--r--arch/x86/include/asm/bitops.h50
-rw-r--r--arch/x86/include/asm/checksum_32.h3
-rw-r--r--arch/x86/include/asm/compat.h11
-rw-r--r--arch/x86/include/asm/cpu.h1
-rw-r--r--arch/x86/include/asm/efi.h1
-rw-r--r--arch/x86/include/asm/local.h16
-rw-r--r--arch/x86/include/asm/percpu.h17
-rw-r--r--arch/x86/include/asm/pgtable.h13
-rw-r--r--arch/x86/include/asm/pgtable_64.h26
-rw-r--r--arch/x86/include/asm/pgtable_types.h8
-rw-r--r--arch/x86/include/asm/preempt.h2
-rw-r--r--arch/x86/include/asm/processor.h20
-rw-r--r--arch/x86/include/asm/rmwcc.h20
-rw-r--r--arch/x86/include/asm/rwsem.h17
-rw-r--r--arch/x86/include/asm/signal.h6
-rw-r--r--arch/x86/include/asm/smp.h6
-rw-r--r--arch/x86/include/asm/sync_bitops.h18
-rw-r--r--arch/x86/include/asm/thread_info.h9
-rw-r--r--arch/x86/include/asm/uaccess.h33
-rw-r--r--arch/x86/include/asm/unistd.h2
-rw-r--r--arch/x86/kernel/asm-offsets.c4
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/rdrand.c4
-rw-r--r--arch/x86/kernel/dumpstack.c20
-rw-r--r--arch/x86/kernel/dumpstack_64.c12
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c2
-rw-r--r--arch/x86/kernel/signal_compat.c108
-rw-r--r--arch/x86/kernel/smpboot.c1
-rw-r--r--arch/x86/kernel/vm86_32.c5
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c3
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/copy_user_64.S8
-rw-r--r--arch/x86/lib/csum-wrappers_64.c1
-rw-r--r--arch/x86/lib/getuser.S20
-rw-r--r--arch/x86/lib/hweight.S77
-rw-r--r--arch/x86/lib/putuser.S10
-rw-r--r--arch/x86/lib/usercopy_64.c2
-rw-r--r--arch/x86/mm/extable.c2
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/init_64.c37
-rw-r--r--arch/x86/mm/pageattr.c37
-rw-r--r--arch/x86/mm/pat.c5
-rw-r--r--arch/x86/mm/pgtable_32.c2
-rw-r--r--arch/x86/platform/efi/efi.c2
-rw-r--r--arch/x86/platform/efi/efi_32.c3
-rw-r--r--arch/x86/platform/efi/efi_64.c5
-rw-r--r--arch/x86/xen/enlighten.c4
-rw-r--r--drivers/char/mem.c6
-rw-r--r--drivers/pnp/isapnp/proc.c2
-rw-r--r--fs/read_write.c18
-rw-r--r--include/linux/context_tracking.h15
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--include/linux/random.h12
-rw-r--r--lib/Makefile5
-rw-r--r--lib/bitmap.c2
-rw-r--r--lib/hweight.c4
-rw-r--r--mm/mmap.c10
-rw-r--r--tools/testing/selftests/x86/Makefile4
-rw-r--r--tools/testing/selftests/x86/mpx-debug.h14
-rw-r--r--tools/testing/selftests/x86/mpx-dig.c498
-rw-r--r--tools/testing/selftests/x86/mpx-hw.h123
-rw-r--r--tools/testing/selftests/x86/mpx-mini-test.c1585
-rw-r--r--tools/testing/selftests/x86/mpx-mm.h9
-rw-r--r--tools/testing/selftests/x86/test_mremap_vdso.c111
88 files changed, 3069 insertions, 406 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d9a94da0c29f..df884a522c39 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -294,11 +294,6 @@ config X86_32_LAZY_GS
294 def_bool y 294 def_bool y
295 depends on X86_32 && !CC_STACKPROTECTOR 295 depends on X86_32 && !CC_STACKPROTECTOR
296 296
297config ARCH_HWEIGHT_CFLAGS
298 string
299 default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
300 default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
301
302config ARCH_SUPPORTS_UPROBES 297config ARCH_SUPPORTS_UPROBES
303 def_bool y 298 def_bool y
304 299
diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h
index 878e4b9940d9..0d41d68131cc 100644
--- a/arch/x86/boot/bitops.h
+++ b/arch/x86/boot/bitops.h
@@ -16,14 +16,16 @@
16#define BOOT_BITOPS_H 16#define BOOT_BITOPS_H
17#define _LINUX_BITOPS_H /* Inhibit inclusion of <linux/bitops.h> */ 17#define _LINUX_BITOPS_H /* Inhibit inclusion of <linux/bitops.h> */
18 18
19static inline int constant_test_bit(int nr, const void *addr) 19#include <linux/types.h>
20
21static inline bool constant_test_bit(int nr, const void *addr)
20{ 22{
21 const u32 *p = (const u32 *)addr; 23 const u32 *p = (const u32 *)addr;
22 return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0; 24 return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0;
23} 25}
24static inline int variable_test_bit(int nr, const void *addr) 26static inline bool variable_test_bit(int nr, const void *addr)
25{ 27{
26 u8 v; 28 bool v;
27 const u32 *p = (const u32 *)addr; 29 const u32 *p = (const u32 *)addr;
28 30
29 asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr)); 31 asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr));
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 9011a88353de..e5612f3e3b57 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -24,6 +24,7 @@
24#include <linux/types.h> 24#include <linux/types.h>
25#include <linux/edd.h> 25#include <linux/edd.h>
26#include <asm/setup.h> 26#include <asm/setup.h>
27#include <asm/asm.h>
27#include "bitops.h" 28#include "bitops.h"
28#include "ctype.h" 29#include "ctype.h"
29#include "cpuflags.h" 30#include "cpuflags.h"
@@ -176,18 +177,18 @@ static inline void wrgs32(u32 v, addr_t addr)
176} 177}
177 178
178/* Note: these only return true/false, not a signed return value! */ 179/* Note: these only return true/false, not a signed return value! */
179static inline int memcmp_fs(const void *s1, addr_t s2, size_t len) 180static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len)
180{ 181{
181 u8 diff; 182 bool diff;
182 asm volatile("fs; repe; cmpsb; setnz %0" 183 asm volatile("fs; repe; cmpsb" CC_SET(nz)
183 : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); 184 : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
184 return diff; 185 return diff;
185} 186}
186static inline int memcmp_gs(const void *s1, addr_t s2, size_t len) 187static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len)
187{ 188{
188 u8 diff; 189 bool diff;
189 asm volatile("gs; repe; cmpsb; setnz %0" 190 asm volatile("gs; repe; cmpsb" CC_SET(nz)
190 : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); 191 : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
191 return diff; 192 return diff;
192} 193}
193 194
@@ -294,6 +295,7 @@ static inline int cmdline_find_option_bool(const char *option)
294 295
295/* cpu.c, cpucheck.c */ 296/* cpu.c, cpucheck.c */
296int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); 297int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
298int check_knl_erratum(void);
297int validate_cpu(void); 299int validate_cpu(void);
298 300
299/* early_serial_console.c */ 301/* early_serial_console.c */
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 29207f69ae8c..26240dde081e 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -93,6 +93,8 @@ int validate_cpu(void)
93 show_cap_strs(err_flags); 93 show_cap_strs(err_flags);
94 putchar('\n'); 94 putchar('\n');
95 return -1; 95 return -1;
96 } else if (check_knl_erratum()) {
97 return -1;
96 } else { 98 } else {
97 return 0; 99 return 0;
98 } 100 }
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 1fd7d575092e..4ad7d70e8739 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -24,6 +24,7 @@
24# include "boot.h" 24# include "boot.h"
25#endif 25#endif
26#include <linux/types.h> 26#include <linux/types.h>
27#include <asm/intel-family.h>
27#include <asm/processor-flags.h> 28#include <asm/processor-flags.h>
28#include <asm/required-features.h> 29#include <asm/required-features.h>
29#include <asm/msr-index.h> 30#include <asm/msr-index.h>
@@ -175,6 +176,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
175 puts("WARNING: PAE disabled. Use parameter 'forcepae' to enable at your own risk!\n"); 176 puts("WARNING: PAE disabled. Use parameter 'forcepae' to enable at your own risk!\n");
176 } 177 }
177 } 178 }
179 if (!err)
180 err = check_knl_erratum();
178 181
179 if (err_flags_ptr) 182 if (err_flags_ptr)
180 *err_flags_ptr = err ? err_flags : NULL; 183 *err_flags_ptr = err ? err_flags : NULL;
@@ -185,3 +188,33 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
185 188
186 return (cpu.level < req_level || err) ? -1 : 0; 189 return (cpu.level < req_level || err) ? -1 : 0;
187} 190}
191
192int check_knl_erratum(void)
193{
194 /*
195 * First check for the affected model/family:
196 */
197 if (!is_intel() ||
198 cpu.family != 6 ||
199 cpu.model != INTEL_FAM6_XEON_PHI_KNL)
200 return 0;
201
202 /*
203 * This erratum affects the Accessed/Dirty bits, and can
204 * cause stray bits to be set in !Present PTEs. We have
205 * enough bits in our 64-bit PTEs (which we have on real
206 * 64-bit mode or PAE) to avoid using these troublesome
207 * bits. But, we do not have enough space in our 32-bit
208 * PTEs. So, refuse to run on 32-bit non-PAE kernels.
209 */
210 if (IS_ENABLED(CONFIG_X86_64) || IS_ENABLED(CONFIG_X86_PAE))
211 return 0;
212
213 puts("This 32-bit kernel can not run on this Xeon Phi x200\n"
214 "processor due to a processor erratum. Use a 64-bit\n"
215 "kernel, or enable PAE in this 32-bit kernel.\n\n");
216
217 return -1;
218}
219
220
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
index 431fa5f84537..6687ab953257 100644
--- a/arch/x86/boot/cpuflags.c
+++ b/arch/x86/boot/cpuflags.c
@@ -102,6 +102,7 @@ void get_cpuflags(void)
102 cpuid(0x1, &tfms, &ignored, &cpu.flags[4], 102 cpuid(0x1, &tfms, &ignored, &cpu.flags[4],
103 &cpu.flags[0]); 103 &cpu.flags[0]);
104 cpu.level = (tfms >> 8) & 15; 104 cpu.level = (tfms >> 8) & 15;
105 cpu.family = cpu.level;
105 cpu.model = (tfms >> 4) & 15; 106 cpu.model = (tfms >> 4) & 15;
106 if (cpu.level >= 6) 107 if (cpu.level >= 6)
107 cpu.model += ((tfms >> 16) & 0xf) << 4; 108 cpu.model += ((tfms >> 16) & 0xf) << 4;
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
index 4cb404fd45ce..15ad56a3f905 100644
--- a/arch/x86/boot/cpuflags.h
+++ b/arch/x86/boot/cpuflags.h
@@ -6,6 +6,7 @@
6 6
7struct cpu_features { 7struct cpu_features {
8 int level; /* Family, or 64 for x86-64 */ 8 int level; /* Family, or 64 for x86-64 */
9 int family; /* Family, always */
9 int model; 10 int model;
10 u32 flags[NCAPINTS]; 11 u32 flags[NCAPINTS];
11}; 12};
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 318b8465d302..cc3bd583dce1 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -17,7 +17,7 @@
17 17
18int memcmp(const void *s1, const void *s2, size_t len) 18int memcmp(const void *s1, const void *s2, size_t len)
19{ 19{
20 u8 diff; 20 bool diff;
21 asm("repe; cmpsb; setnz %0" 21 asm("repe; cmpsb; setnz %0"
22 : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); 22 : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
23 return diff; 23 return diff;
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index ec138e538c44..9e1e27d31c6d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -40,10 +40,10 @@ static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
40 40
41#ifdef CONFIG_CONTEXT_TRACKING 41#ifdef CONFIG_CONTEXT_TRACKING
42/* Called on entry from user mode with IRQs off. */ 42/* Called on entry from user mode with IRQs off. */
43__visible void enter_from_user_mode(void) 43__visible inline void enter_from_user_mode(void)
44{ 44{
45 CT_WARN_ON(ct_state() != CONTEXT_USER); 45 CT_WARN_ON(ct_state() != CONTEXT_USER);
46 user_exit(); 46 user_exit_irqoff();
47} 47}
48#else 48#else
49static inline void enter_from_user_mode(void) {} 49static inline void enter_from_user_mode(void) {}
@@ -274,7 +274,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
274 ti->status &= ~TS_COMPAT; 274 ti->status &= ~TS_COMPAT;
275#endif 275#endif
276 276
277 user_enter(); 277 user_enter_irqoff();
278} 278}
279 279
280#define SYSCALL_EXIT_WORK_FLAGS \ 280#define SYSCALL_EXIT_WORK_FLAGS \
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 983e5d3a0d27..0b56666e6039 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1153,3 +1153,14 @@ ENTRY(async_page_fault)
1153 jmp error_code 1153 jmp error_code
1154END(async_page_fault) 1154END(async_page_fault)
1155#endif 1155#endif
1156
1157ENTRY(rewind_stack_do_exit)
1158 /* Prevent any naive code from trying to unwind to our caller. */
1159 xorl %ebp, %ebp
1160
1161 movl PER_CPU_VAR(cpu_current_top_of_stack), %esi
1162 leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
1163
1164 call do_exit
11651: jmp 1b
1166END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9ee0da1807ed..b846875aeea6 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1423,3 +1423,14 @@ ENTRY(ignore_sysret)
1423 mov $-ENOSYS, %eax 1423 mov $-ENOSYS, %eax
1424 sysret 1424 sysret
1425END(ignore_sysret) 1425END(ignore_sysret)
1426
1427ENTRY(rewind_stack_do_exit)
1428 /* Prevent any naive code from trying to unwind to our caller. */
1429 xorl %ebp, %ebp
1430
1431 movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
1432 leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp
1433
1434 call do_exit
14351: jmp 1b
1436END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 555263e385c9..e9ce9c7c39b4 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -374,5 +374,5 @@
374543 x32 io_setup compat_sys_io_setup 374543 x32 io_setup compat_sys_io_setup
375544 x32 io_submit compat_sys_io_submit 375544 x32 io_submit compat_sys_io_submit
376545 x32 execveat compat_sys_execveat/ptregs 376545 x32 execveat compat_sys_execveat/ptregs
377534 x32 preadv2 compat_sys_preadv2 377546 x32 preadv2 compat_sys_preadv64v2
378535 x32 pwritev2 compat_sys_pwritev2 378547 x32 pwritev2 compat_sys_pwritev64v2
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index 027aec4a74df..627ecbcb2e62 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -33,7 +33,7 @@
33 .endif 33 .endif
34 34
35 call \func 35 call \func
36 jmp restore 36 jmp .L_restore
37 _ASM_NOKPROBE(\name) 37 _ASM_NOKPROBE(\name)
38 .endm 38 .endm
39 39
@@ -54,7 +54,7 @@
54#if defined(CONFIG_TRACE_IRQFLAGS) \ 54#if defined(CONFIG_TRACE_IRQFLAGS) \
55 || defined(CONFIG_DEBUG_LOCK_ALLOC) \ 55 || defined(CONFIG_DEBUG_LOCK_ALLOC) \
56 || defined(CONFIG_PREEMPT) 56 || defined(CONFIG_PREEMPT)
57restore: 57.L_restore:
58 popq %r11 58 popq %r11
59 popq %r10 59 popq %r10
60 popq %r9 60 popq %r9
@@ -66,5 +66,5 @@ restore:
66 popq %rdi 66 popq %rdi
67 popq %rbp 67 popq %rbp
68 ret 68 ret
69 _ASM_NOKPROBE(restore) 69 _ASM_NOKPROBE(.L_restore)
70#endif 70#endif
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 253b72eaade6..68b63fddc209 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -134,7 +134,7 @@ VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
134override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ 134override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
135 135
136targets += vdso32/vdso32.lds 136targets += vdso32/vdso32.lds
137targets += vdso32/note.o vdso32/vclock_gettime.o vdso32/system_call.o 137targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
138targets += vdso32/vclock_gettime.o 138targets += vdso32/vclock_gettime.o
139 139
140KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO 140KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO
@@ -156,7 +156,8 @@ $(obj)/vdso32.so.dbg: FORCE \
156 $(obj)/vdso32/vdso32.lds \ 156 $(obj)/vdso32/vdso32.lds \
157 $(obj)/vdso32/vclock_gettime.o \ 157 $(obj)/vdso32/vclock_gettime.o \
158 $(obj)/vdso32/note.o \ 158 $(obj)/vdso32/note.o \
159 $(obj)/vdso32/system_call.o 159 $(obj)/vdso32/system_call.o \
160 $(obj)/vdso32/sigreturn.o
160 $(call if_changed,vdso) 161 $(call if_changed,vdso)
161 162
162# 163#
diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S
index d7ec4e251c0a..20633e026e82 100644
--- a/arch/x86/entry/vdso/vdso32/sigreturn.S
+++ b/arch/x86/entry/vdso/vdso32/sigreturn.S
@@ -1,11 +1,3 @@
1/*
2 * Common code for the sigreturn entry points in vDSO images.
3 * So far this code is the same for both int80 and sysenter versions.
4 * This file is #include'd by int80.S et al to define them first thing.
5 * The kernel assumes that the addresses of these routines are constant
6 * for all vDSO implementations.
7 */
8
9#include <linux/linkage.h> 1#include <linux/linkage.h>
10#include <asm/unistd_32.h> 2#include <asm/unistd_32.h>
11#include <asm/asm-offsets.h> 3#include <asm/asm-offsets.h>
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 0109ac6cb79c..ed4bc9731cbb 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -2,16 +2,11 @@
2 * AT_SYSINFO entry point 2 * AT_SYSINFO entry point
3*/ 3*/
4 4
5#include <linux/linkage.h>
5#include <asm/dwarf2.h> 6#include <asm/dwarf2.h>
6#include <asm/cpufeatures.h> 7#include <asm/cpufeatures.h>
7#include <asm/alternative-asm.h> 8#include <asm/alternative-asm.h>
8 9
9/*
10 * First get the common code for the sigreturn entry points.
11 * This must come first.
12 */
13#include "sigreturn.S"
14
15 .text 10 .text
16 .globl __kernel_vsyscall 11 .globl __kernel_vsyscall
17 .type __kernel_vsyscall,@function 12 .type __kernel_vsyscall,@function
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index ab220ac9b3b9..3329844e3c43 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -12,6 +12,7 @@
12#include <linux/random.h> 12#include <linux/random.h>
13#include <linux/elf.h> 13#include <linux/elf.h>
14#include <linux/cpu.h> 14#include <linux/cpu.h>
15#include <linux/ptrace.h>
15#include <asm/pvclock.h> 16#include <asm/pvclock.h>
16#include <asm/vgtod.h> 17#include <asm/vgtod.h>
17#include <asm/proto.h> 18#include <asm/proto.h>
@@ -97,10 +98,40 @@ static int vdso_fault(const struct vm_special_mapping *sm,
97 return 0; 98 return 0;
98} 99}
99 100
100static const struct vm_special_mapping text_mapping = { 101static void vdso_fix_landing(const struct vdso_image *image,
101 .name = "[vdso]", 102 struct vm_area_struct *new_vma)
102 .fault = vdso_fault, 103{
103}; 104#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
105 if (in_ia32_syscall() && image == &vdso_image_32) {
106 struct pt_regs *regs = current_pt_regs();
107 unsigned long vdso_land = image->sym_int80_landing_pad;
108 unsigned long old_land_addr = vdso_land +
109 (unsigned long)current->mm->context.vdso;
110
111 /* Fixing userspace landing - look at do_fast_syscall_32 */
112 if (regs->ip == old_land_addr)
113 regs->ip = new_vma->vm_start + vdso_land;
114 }
115#endif
116}
117
118static int vdso_mremap(const struct vm_special_mapping *sm,
119 struct vm_area_struct *new_vma)
120{
121 unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
122 const struct vdso_image *image = current->mm->context.vdso_image;
123
124 if (image->size != new_size)
125 return -EINVAL;
126
127 if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
128 return -EFAULT;
129
130 vdso_fix_landing(image, new_vma);
131 current->mm->context.vdso = (void __user *)new_vma->vm_start;
132
133 return 0;
134}
104 135
105static int vvar_fault(const struct vm_special_mapping *sm, 136static int vvar_fault(const struct vm_special_mapping *sm,
106 struct vm_area_struct *vma, struct vm_fault *vmf) 137 struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -151,6 +182,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
151 struct vm_area_struct *vma; 182 struct vm_area_struct *vma;
152 unsigned long addr, text_start; 183 unsigned long addr, text_start;
153 int ret = 0; 184 int ret = 0;
185
186 static const struct vm_special_mapping vdso_mapping = {
187 .name = "[vdso]",
188 .fault = vdso_fault,
189 .mremap = vdso_mremap,
190 };
154 static const struct vm_special_mapping vvar_mapping = { 191 static const struct vm_special_mapping vvar_mapping = {
155 .name = "[vvar]", 192 .name = "[vvar]",
156 .fault = vvar_fault, 193 .fault = vvar_fault,
@@ -185,7 +222,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
185 image->size, 222 image->size,
186 VM_READ|VM_EXEC| 223 VM_READ|VM_EXEC|
187 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 224 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
188 &text_mapping); 225 &vdso_mapping);
189 226
190 if (IS_ERR(vma)) { 227 if (IS_ERR(vma)) {
191 ret = PTR_ERR(vma); 228 ret = PTR_ERR(vma);
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 174c2549939d..75fc719b7f31 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -96,7 +96,7 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
96{ 96{
97 /* 97 /*
98 * XXX: if access_ok, get_user, and put_user handled 98 * XXX: if access_ok, get_user, and put_user handled
99 * sig_on_uaccess_error, this could go away. 99 * sig_on_uaccess_err, this could go away.
100 */ 100 */
101 101
102 if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) { 102 if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
@@ -125,7 +125,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
125 struct task_struct *tsk; 125 struct task_struct *tsk;
126 unsigned long caller; 126 unsigned long caller;
127 int vsyscall_nr, syscall_nr, tmp; 127 int vsyscall_nr, syscall_nr, tmp;
128 int prev_sig_on_uaccess_error; 128 int prev_sig_on_uaccess_err;
129 long ret; 129 long ret;
130 130
131 /* 131 /*
@@ -221,8 +221,8 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
221 * With a real vsyscall, page faults cause SIGSEGV. We want to 221 * With a real vsyscall, page faults cause SIGSEGV. We want to
222 * preserve that behavior to make writing exploits harder. 222 * preserve that behavior to make writing exploits harder.
223 */ 223 */
224 prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; 224 prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err;
225 current_thread_info()->sig_on_uaccess_error = 1; 225 current->thread.sig_on_uaccess_err = 1;
226 226
227 ret = -EFAULT; 227 ret = -EFAULT;
228 switch (vsyscall_nr) { 228 switch (vsyscall_nr) {
@@ -243,7 +243,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
243 break; 243 break;
244 } 244 }
245 245
246 current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; 246 current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err;
247 247
248check_fault: 248check_fault:
249 if (ret == -EFAULT) { 249 if (ret == -EFAULT) {
diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index 20370c6db74b..93eebc636c76 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -45,11 +45,11 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
45 : "memory", "cc"); 45 : "memory", "cc");
46} 46}
47 47
48static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, 48static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
49 u32 ecx_in, u32 *eax) 49 u32 ecx_in, u32 *eax)
50{ 50{
51 int cx, dx, si; 51 int cx, dx, si;
52 u8 error; 52 bool error;
53 53
54 /* 54 /*
55 * N.B. We do NOT need a cld after the BIOS call 55 * N.B. We do NOT need a cld after the BIOS call
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 02e799fa43d1..e7cd63175de4 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -4,8 +4,8 @@
4#include <asm/cpufeatures.h> 4#include <asm/cpufeatures.h>
5 5
6#ifdef CONFIG_64BIT 6#ifdef CONFIG_64BIT
7/* popcnt %edi, %eax -- redundant REX prefix for alignment */ 7/* popcnt %edi, %eax */
8#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" 8#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc7"
9/* popcnt %rdi, %rax */ 9/* popcnt %rdi, %rax */
10#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" 10#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7"
11#define REG_IN "D" 11#define REG_IN "D"
@@ -17,19 +17,15 @@
17#define REG_OUT "a" 17#define REG_OUT "a"
18#endif 18#endif
19 19
20/* 20#define __HAVE_ARCH_SW_HWEIGHT
21 * __sw_hweightXX are called from within the alternatives below 21
22 * and callee-clobbered registers need to be taken care of. See
23 * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
24 * compiler switches.
25 */
26static __always_inline unsigned int __arch_hweight32(unsigned int w) 22static __always_inline unsigned int __arch_hweight32(unsigned int w)
27{ 23{
28 unsigned int res = 0; 24 unsigned int res;
29 25
30 asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) 26 asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
31 : "="REG_OUT (res) 27 : "="REG_OUT (res)
32 : REG_IN (w)); 28 : REG_IN (w));
33 29
34 return res; 30 return res;
35} 31}
@@ -53,11 +49,11 @@ static inline unsigned long __arch_hweight64(__u64 w)
53#else 49#else
54static __always_inline unsigned long __arch_hweight64(__u64 w) 50static __always_inline unsigned long __arch_hweight64(__u64 w)
55{ 51{
56 unsigned long res = 0; 52 unsigned long res;
57 53
58 asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) 54 asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT)
59 : "="REG_OUT (res) 55 : "="REG_OUT (res)
60 : REG_IN (w)); 56 : REG_IN (w));
61 57
62 return res; 58 return res;
63} 59}
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 69f1366f1aa3..5b0579abb398 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -25,8 +25,6 @@
25 25
26#include <asm/processor.h> 26#include <asm/processor.h>
27#include <asm/cpufeature.h> 27#include <asm/cpufeature.h>
28#include <asm/alternative.h>
29#include <asm/nops.h>
30 28
31#define RDRAND_RETRY_LOOPS 10 29#define RDRAND_RETRY_LOOPS 10
32 30
@@ -40,97 +38,91 @@
40# define RDSEED_LONG RDSEED_INT 38# define RDSEED_LONG RDSEED_INT
41#endif 39#endif
42 40
43#ifdef CONFIG_ARCH_RANDOM 41/* Unconditional execution of RDRAND and RDSEED */
44 42
45/* Instead of arch_get_random_long() when alternatives haven't run. */ 43static inline bool rdrand_long(unsigned long *v)
46static inline int rdrand_long(unsigned long *v)
47{ 44{
48 int ok; 45 bool ok;
49 asm volatile("1: " RDRAND_LONG "\n\t" 46 unsigned int retry = RDRAND_RETRY_LOOPS;
50 "jc 2f\n\t" 47 do {
51 "decl %0\n\t" 48 asm volatile(RDRAND_LONG "\n\t"
52 "jnz 1b\n\t" 49 CC_SET(c)
53 "2:" 50 : CC_OUT(c) (ok), "=a" (*v));
54 : "=r" (ok), "=a" (*v) 51 if (ok)
55 : "0" (RDRAND_RETRY_LOOPS)); 52 return true;
56 return ok; 53 } while (--retry);
54 return false;
55}
56
57static inline bool rdrand_int(unsigned int *v)
58{
59 bool ok;
60 unsigned int retry = RDRAND_RETRY_LOOPS;
61 do {
62 asm volatile(RDRAND_INT "\n\t"
63 CC_SET(c)
64 : CC_OUT(c) (ok), "=a" (*v));
65 if (ok)
66 return true;
67 } while (--retry);
68 return false;
57} 69}
58 70
59/* A single attempt at RDSEED */
60static inline bool rdseed_long(unsigned long *v) 71static inline bool rdseed_long(unsigned long *v)
61{ 72{
62 unsigned char ok; 73 bool ok;
63 asm volatile(RDSEED_LONG "\n\t" 74 asm volatile(RDSEED_LONG "\n\t"
64 "setc %0" 75 CC_SET(c)
65 : "=qm" (ok), "=a" (*v)); 76 : CC_OUT(c) (ok), "=a" (*v));
66 return ok; 77 return ok;
67} 78}
68 79
69#define GET_RANDOM(name, type, rdrand, nop) \ 80static inline bool rdseed_int(unsigned int *v)
70static inline int name(type *v) \ 81{
71{ \ 82 bool ok;
72 int ok; \ 83 asm volatile(RDSEED_INT "\n\t"
73 alternative_io("movl $0, %0\n\t" \ 84 CC_SET(c)
74 nop, \ 85 : CC_OUT(c) (ok), "=a" (*v));
75 "\n1: " rdrand "\n\t" \ 86 return ok;
76 "jc 2f\n\t" \
77 "decl %0\n\t" \
78 "jnz 1b\n\t" \
79 "2:", \
80 X86_FEATURE_RDRAND, \
81 ASM_OUTPUT2("=r" (ok), "=a" (*v)), \
82 "0" (RDRAND_RETRY_LOOPS)); \
83 return ok; \
84}
85
86#define GET_SEED(name, type, rdseed, nop) \
87static inline int name(type *v) \
88{ \
89 unsigned char ok; \
90 alternative_io("movb $0, %0\n\t" \
91 nop, \
92 rdseed "\n\t" \
93 "setc %0", \
94 X86_FEATURE_RDSEED, \
95 ASM_OUTPUT2("=q" (ok), "=a" (*v))); \
96 return ok; \
97} 87}
98 88
99#ifdef CONFIG_X86_64 89/* Conditional execution based on CPU type */
100
101GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5);
102GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4);
103
104GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP5);
105GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4);
106
107#else
108
109GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3);
110GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3);
111
112GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP4);
113GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4);
114
115#endif /* CONFIG_X86_64 */
116
117#define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND) 90#define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND)
118#define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED) 91#define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED)
119 92
120#else 93/*
94 * These are the generic interfaces; they must not be declared if the
95 * stubs in <linux/random.h> are to be invoked,
96 * i.e. CONFIG_ARCH_RANDOM is not defined.
97 */
98#ifdef CONFIG_ARCH_RANDOM
121 99
122static inline int rdrand_long(unsigned long *v) 100static inline bool arch_get_random_long(unsigned long *v)
123{ 101{
124 return 0; 102 return arch_has_random() ? rdrand_long(v) : false;
125} 103}
126 104
127static inline bool rdseed_long(unsigned long *v) 105static inline bool arch_get_random_int(unsigned int *v)
128{ 106{
129 return 0; 107 return arch_has_random() ? rdrand_int(v) : false;
130} 108}
131 109
132#endif /* CONFIG_ARCH_RANDOM */ 110static inline bool arch_get_random_seed_long(unsigned long *v)
111{
112 return arch_has_random_seed() ? rdseed_long(v) : false;
113}
114
115static inline bool arch_get_random_seed_int(unsigned int *v)
116{
117 return arch_has_random_seed() ? rdseed_int(v) : false;
118}
133 119
134extern void x86_init_rdrand(struct cpuinfo_x86 *c); 120extern void x86_init_rdrand(struct cpuinfo_x86 *c);
135 121
122#else /* !CONFIG_ARCH_RANDOM */
123
124static inline void x86_init_rdrand(struct cpuinfo_x86 *c) { }
125
126#endif /* !CONFIG_ARCH_RANDOM */
127
136#endif /* ASM_X86_ARCHRANDOM_H */ 128#endif /* ASM_X86_ARCHRANDOM_H */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index f5063b6659eb..7acb51c49fec 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -42,6 +42,18 @@
42#define _ASM_SI __ASM_REG(si) 42#define _ASM_SI __ASM_REG(si)
43#define _ASM_DI __ASM_REG(di) 43#define _ASM_DI __ASM_REG(di)
44 44
45/*
46 * Macros to generate condition code outputs from inline assembly,
47 * The output operand must be type "bool".
48 */
49#ifdef __GCC_ASM_FLAG_OUTPUTS__
50# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
51# define CC_OUT(c) "=@cc" #c
52#else
53# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n"
54# define CC_OUT(c) [_cc_ ## c] "=qm"
55#endif
56
45/* Exception table entry */ 57/* Exception table entry */
46#ifdef __ASSEMBLY__ 58#ifdef __ASSEMBLY__
47# define _ASM_EXTABLE_HANDLE(from, to, handler) \ 59# define _ASM_EXTABLE_HANDLE(from, to, handler) \
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index a58b99811105..14635c5ea025 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -75,9 +75,9 @@ static __always_inline void atomic_sub(int i, atomic_t *v)
75 * true if the result is zero, or false for all 75 * true if the result is zero, or false for all
76 * other cases. 76 * other cases.
77 */ 77 */
78static __always_inline int atomic_sub_and_test(int i, atomic_t *v) 78static __always_inline bool atomic_sub_and_test(int i, atomic_t *v)
79{ 79{
80 GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e"); 80 GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
81} 81}
82 82
83/** 83/**
@@ -112,9 +112,9 @@ static __always_inline void atomic_dec(atomic_t *v)
112 * returns true if the result is 0, or false for all other 112 * returns true if the result is 0, or false for all other
113 * cases. 113 * cases.
114 */ 114 */
115static __always_inline int atomic_dec_and_test(atomic_t *v) 115static __always_inline bool atomic_dec_and_test(atomic_t *v)
116{ 116{
117 GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); 117 GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
118} 118}
119 119
120/** 120/**
@@ -125,9 +125,9 @@ static __always_inline int atomic_dec_and_test(atomic_t *v)
125 * and returns true if the result is zero, or false for all 125 * and returns true if the result is zero, or false for all
126 * other cases. 126 * other cases.
127 */ 127 */
128static __always_inline int atomic_inc_and_test(atomic_t *v) 128static __always_inline bool atomic_inc_and_test(atomic_t *v)
129{ 129{
130 GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e"); 130 GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
131} 131}
132 132
133/** 133/**
@@ -139,9 +139,9 @@ static __always_inline int atomic_inc_and_test(atomic_t *v)
139 * if the result is negative, or false when 139 * if the result is negative, or false when
140 * result is greater than or equal to zero. 140 * result is greater than or equal to zero.
141 */ 141 */
142static __always_inline int atomic_add_negative(int i, atomic_t *v) 142static __always_inline bool atomic_add_negative(int i, atomic_t *v)
143{ 143{
144 GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s"); 144 GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
145} 145}
146 146
147/** 147/**
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 70eed0e14553..89ed2f6ae2f7 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -70,9 +70,9 @@ static inline void atomic64_sub(long i, atomic64_t *v)
70 * true if the result is zero, or false for all 70 * true if the result is zero, or false for all
71 * other cases. 71 * other cases.
72 */ 72 */
73static inline int atomic64_sub_and_test(long i, atomic64_t *v) 73static inline bool atomic64_sub_and_test(long i, atomic64_t *v)
74{ 74{
75 GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", "e"); 75 GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
76} 76}
77 77
78/** 78/**
@@ -109,9 +109,9 @@ static __always_inline void atomic64_dec(atomic64_t *v)
109 * returns true if the result is 0, or false for all other 109 * returns true if the result is 0, or false for all other
110 * cases. 110 * cases.
111 */ 111 */
112static inline int atomic64_dec_and_test(atomic64_t *v) 112static inline bool atomic64_dec_and_test(atomic64_t *v)
113{ 113{
114 GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e"); 114 GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
115} 115}
116 116
117/** 117/**
@@ -122,9 +122,9 @@ static inline int atomic64_dec_and_test(atomic64_t *v)
122 * and returns true if the result is zero, or false for all 122 * and returns true if the result is zero, or false for all
123 * other cases. 123 * other cases.
124 */ 124 */
125static inline int atomic64_inc_and_test(atomic64_t *v) 125static inline bool atomic64_inc_and_test(atomic64_t *v)
126{ 126{
127 GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e"); 127 GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
128} 128}
129 129
130/** 130/**
@@ -136,9 +136,9 @@ static inline int atomic64_inc_and_test(atomic64_t *v)
136 * if the result is negative, or false when 136 * if the result is negative, or false when
137 * result is greater than or equal to zero. 137 * result is greater than or equal to zero.
138 */ 138 */
139static inline int atomic64_add_negative(long i, atomic64_t *v) 139static inline bool atomic64_add_negative(long i, atomic64_t *v)
140{ 140{
141 GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", "s"); 141 GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
142} 142}
143 143
144/** 144/**
@@ -190,7 +190,7 @@ static inline long atomic64_xchg(atomic64_t *v, long new)
190 * Atomically adds @a to @v, so long as it was not @u. 190 * Atomically adds @a to @v, so long as it was not @u.
191 * Returns the old value of @v. 191 * Returns the old value of @v.
192 */ 192 */
193static inline int atomic64_add_unless(atomic64_t *v, long a, long u) 193static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
194{ 194{
195 long c, old; 195 long c, old;
196 c = atomic64_read(v); 196 c = atomic64_read(v);
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 7766d1cf096e..68557f52b961 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -201,9 +201,9 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
201 * This operation is atomic and cannot be reordered. 201 * This operation is atomic and cannot be reordered.
202 * It also implies a memory barrier. 202 * It also implies a memory barrier.
203 */ 203 */
204static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr) 204static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
205{ 205{
206 GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c"); 206 GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c);
207} 207}
208 208
209/** 209/**
@@ -213,7 +213,7 @@ static __always_inline int test_and_set_bit(long nr, volatile unsigned long *add
213 * 213 *
214 * This is the same as test_and_set_bit on x86. 214 * This is the same as test_and_set_bit on x86.
215 */ 215 */
216static __always_inline int 216static __always_inline bool
217test_and_set_bit_lock(long nr, volatile unsigned long *addr) 217test_and_set_bit_lock(long nr, volatile unsigned long *addr)
218{ 218{
219 return test_and_set_bit(nr, addr); 219 return test_and_set_bit(nr, addr);
@@ -228,13 +228,13 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr)
228 * If two examples of this operation race, one can appear to succeed 228 * If two examples of this operation race, one can appear to succeed
229 * but actually fail. You must protect multiple accesses with a lock. 229 * but actually fail. You must protect multiple accesses with a lock.
230 */ 230 */
231static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr) 231static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
232{ 232{
233 int oldbit; 233 bool oldbit;
234 234
235 asm("bts %2,%1\n\t" 235 asm("bts %2,%1\n\t"
236 "sbb %0,%0" 236 CC_SET(c)
237 : "=r" (oldbit), ADDR 237 : CC_OUT(c) (oldbit), ADDR
238 : "Ir" (nr)); 238 : "Ir" (nr));
239 return oldbit; 239 return oldbit;
240} 240}
@@ -247,9 +247,9 @@ static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *a
247 * This operation is atomic and cannot be reordered. 247 * This operation is atomic and cannot be reordered.
248 * It also implies a memory barrier. 248 * It also implies a memory barrier.
249 */ 249 */
250static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr) 250static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
251{ 251{
252 GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c"); 252 GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c);
253} 253}
254 254
255/** 255/**
@@ -268,25 +268,25 @@ static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *a
268 * accessed from a hypervisor on the same CPU if running in a VM: don't change 268 * accessed from a hypervisor on the same CPU if running in a VM: don't change
269 * this without also updating arch/x86/kernel/kvm.c 269 * this without also updating arch/x86/kernel/kvm.c
270 */ 270 */
271static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) 271static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
272{ 272{
273 int oldbit; 273 bool oldbit;
274 274
275 asm volatile("btr %2,%1\n\t" 275 asm volatile("btr %2,%1\n\t"
276 "sbb %0,%0" 276 CC_SET(c)
277 : "=r" (oldbit), ADDR 277 : CC_OUT(c) (oldbit), ADDR
278 : "Ir" (nr)); 278 : "Ir" (nr));
279 return oldbit; 279 return oldbit;
280} 280}
281 281
282/* WARNING: non atomic and it can be reordered! */ 282/* WARNING: non atomic and it can be reordered! */
283static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr) 283static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
284{ 284{
285 int oldbit; 285 bool oldbit;
286 286
287 asm volatile("btc %2,%1\n\t" 287 asm volatile("btc %2,%1\n\t"
288 "sbb %0,%0" 288 CC_SET(c)
289 : "=r" (oldbit), ADDR 289 : CC_OUT(c) (oldbit), ADDR
290 : "Ir" (nr) : "memory"); 290 : "Ir" (nr) : "memory");
291 291
292 return oldbit; 292 return oldbit;
@@ -300,24 +300,24 @@ static __always_inline int __test_and_change_bit(long nr, volatile unsigned long
300 * This operation is atomic and cannot be reordered. 300 * This operation is atomic and cannot be reordered.
301 * It also implies a memory barrier. 301 * It also implies a memory barrier.
302 */ 302 */
303static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr) 303static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
304{ 304{
305 GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c"); 305 GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c);
306} 306}
307 307
308static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr) 308static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
309{ 309{
310 return ((1UL << (nr & (BITS_PER_LONG-1))) & 310 return ((1UL << (nr & (BITS_PER_LONG-1))) &
311 (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; 311 (addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
312} 312}
313 313
314static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr) 314static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr)
315{ 315{
316 int oldbit; 316 bool oldbit;
317 317
318 asm volatile("bt %2,%1\n\t" 318 asm volatile("bt %2,%1\n\t"
319 "sbb %0,%0" 319 CC_SET(c)
320 : "=r" (oldbit) 320 : CC_OUT(c) (oldbit)
321 : "m" (*(unsigned long *)addr), "Ir" (nr)); 321 : "m" (*(unsigned long *)addr), "Ir" (nr));
322 322
323 return oldbit; 323 return oldbit;
@@ -329,7 +329,7 @@ static __always_inline int variable_test_bit(long nr, volatile const unsigned lo
329 * @nr: bit number to test 329 * @nr: bit number to test
330 * @addr: Address to start counting from 330 * @addr: Address to start counting from
331 */ 331 */
332static int test_bit(int nr, const volatile unsigned long *addr); 332static bool test_bit(int nr, const volatile unsigned long *addr);
333#endif 333#endif
334 334
335#define test_bit(nr, addr) \ 335#define test_bit(nr, addr) \
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index 532f85e6651f..7b53743ed267 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -2,8 +2,7 @@
2#define _ASM_X86_CHECKSUM_32_H 2#define _ASM_X86_CHECKSUM_32_H
3 3
4#include <linux/in6.h> 4#include <linux/in6.h>
5 5#include <linux/uaccess.h>
6#include <asm/uaccess.h>
7 6
8/* 7/*
9 * computes the checksum of a memory block at buff, length len, 8 * computes the checksum of a memory block at buff, length len,
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 5a3b2c119ed0..a18806165fe4 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -40,6 +40,7 @@ typedef s32 compat_long_t;
40typedef s64 __attribute__((aligned(4))) compat_s64; 40typedef s64 __attribute__((aligned(4))) compat_s64;
41typedef u32 compat_uint_t; 41typedef u32 compat_uint_t;
42typedef u32 compat_ulong_t; 42typedef u32 compat_ulong_t;
43typedef u32 compat_u32;
43typedef u64 __attribute__((aligned(4))) compat_u64; 44typedef u64 __attribute__((aligned(4))) compat_u64;
44typedef u32 compat_uptr_t; 45typedef u32 compat_uptr_t;
45 46
@@ -181,6 +182,16 @@ typedef struct compat_siginfo {
181 /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ 182 /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
182 struct { 183 struct {
183 unsigned int _addr; /* faulting insn/memory ref. */ 184 unsigned int _addr; /* faulting insn/memory ref. */
185 short int _addr_lsb; /* Valid LSB of the reported address. */
186 union {
187 /* used when si_code=SEGV_BNDERR */
188 struct {
189 compat_uptr_t _lower;
190 compat_uptr_t _upper;
191 } _addr_bnd;
192 /* used when si_code=SEGV_PKUERR */
193 compat_u32 _pkey;
194 };
184 } _sigfault; 195 } _sigfault;
185 196
186 /* SIGPOLL */ 197 /* SIGPOLL */
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 678637ad7476..59d34c521d96 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -17,7 +17,6 @@ static inline void prefill_possible_map(void) {}
17 17
18#define cpu_physical_id(cpu) boot_cpu_physical_apicid 18#define cpu_physical_id(cpu) boot_cpu_physical_apicid
19#define safe_smp_processor_id() 0 19#define safe_smp_processor_id() 0
20#define stack_smp_processor_id() 0
21 20
22#endif /* CONFIG_SMP */ 21#endif /* CONFIG_SMP */
23 22
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 55b4596ef688..d0bb76d81402 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -124,7 +124,6 @@ extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
124extern void efi_sync_low_kernel_mappings(void); 124extern void efi_sync_low_kernel_mappings(void);
125extern int __init efi_alloc_page_tables(void); 125extern int __init efi_alloc_page_tables(void);
126extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); 126extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
127extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
128extern void __init old_map_region(efi_memory_desc_t *md); 127extern void __init old_map_region(efi_memory_desc_t *md);
129extern void __init runtime_code_page_mkexec(void); 128extern void __init runtime_code_page_mkexec(void);
130extern void __init efi_runtime_update_mappings(void); 129extern void __init efi_runtime_update_mappings(void);
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 4ad6560847b1..7511978093eb 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -50,9 +50,9 @@ static inline void local_sub(long i, local_t *l)
50 * true if the result is zero, or false for all 50 * true if the result is zero, or false for all
51 * other cases. 51 * other cases.
52 */ 52 */
53static inline int local_sub_and_test(long i, local_t *l) 53static inline bool local_sub_and_test(long i, local_t *l)
54{ 54{
55 GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", "e"); 55 GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", e);
56} 56}
57 57
58/** 58/**
@@ -63,9 +63,9 @@ static inline int local_sub_and_test(long i, local_t *l)
63 * returns true if the result is 0, or false for all other 63 * returns true if the result is 0, or false for all other
64 * cases. 64 * cases.
65 */ 65 */
66static inline int local_dec_and_test(local_t *l) 66static inline bool local_dec_and_test(local_t *l)
67{ 67{
68 GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e"); 68 GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", e);
69} 69}
70 70
71/** 71/**
@@ -76,9 +76,9 @@ static inline int local_dec_and_test(local_t *l)
76 * and returns true if the result is zero, or false for all 76 * and returns true if the result is zero, or false for all
77 * other cases. 77 * other cases.
78 */ 78 */
79static inline int local_inc_and_test(local_t *l) 79static inline bool local_inc_and_test(local_t *l)
80{ 80{
81 GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e"); 81 GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", e);
82} 82}
83 83
84/** 84/**
@@ -90,9 +90,9 @@ static inline int local_inc_and_test(local_t *l)
90 * if the result is negative, or false when 90 * if the result is negative, or false when
91 * result is greater than or equal to zero. 91 * result is greater than or equal to zero.
92 */ 92 */
93static inline int local_add_negative(long i, local_t *l) 93static inline bool local_add_negative(long i, local_t *l)
94{ 94{
95 GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", "s"); 95 GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", s);
96} 96}
97 97
98/** 98/**
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index e0ba66ca68c6..e02e3f80d363 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -510,14 +510,15 @@ do { \
510/* This is not atomic against other CPUs -- CPU preemption needs to be off */ 510/* This is not atomic against other CPUs -- CPU preemption needs to be off */
511#define x86_test_and_clear_bit_percpu(bit, var) \ 511#define x86_test_and_clear_bit_percpu(bit, var) \
512({ \ 512({ \
513 int old__; \ 513 bool old__; \
514 asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ 514 asm volatile("btr %2,"__percpu_arg(1)"\n\t" \
515 : "=r" (old__), "+m" (var) \ 515 CC_SET(c) \
516 : CC_OUT(c) (old__), "+m" (var) \
516 : "dIr" (bit)); \ 517 : "dIr" (bit)); \
517 old__; \ 518 old__; \
518}) 519})
519 520
520static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr, 521static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
521 const unsigned long __percpu *addr) 522 const unsigned long __percpu *addr)
522{ 523{
523 unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; 524 unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
@@ -529,14 +530,14 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
529#endif 530#endif
530} 531}
531 532
532static inline int x86_this_cpu_variable_test_bit(int nr, 533static inline bool x86_this_cpu_variable_test_bit(int nr,
533 const unsigned long __percpu *addr) 534 const unsigned long __percpu *addr)
534{ 535{
535 int oldbit; 536 bool oldbit;
536 537
537 asm volatile("bt "__percpu_arg(2)",%1\n\t" 538 asm volatile("bt "__percpu_arg(2)",%1\n\t"
538 "sbb %0,%0" 539 CC_SET(c)
539 : "=r" (oldbit) 540 : CC_OUT(c) (oldbit)
540 : "m" (*(unsigned long *)addr), "Ir" (nr)); 541 : "m" (*(unsigned long *)addr), "Ir" (nr));
541 542
542 return oldbit; 543 return oldbit;
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1a27396b6ea0..2815d268af8b 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -480,7 +480,7 @@ pte_t *populate_extra_pte(unsigned long vaddr);
480 480
481static inline int pte_none(pte_t pte) 481static inline int pte_none(pte_t pte)
482{ 482{
483 return !pte.pte; 483 return !(pte.pte & ~(_PAGE_KNL_ERRATUM_MASK));
484} 484}
485 485
486#define __HAVE_ARCH_PTE_SAME 486#define __HAVE_ARCH_PTE_SAME
@@ -552,7 +552,8 @@ static inline int pmd_none(pmd_t pmd)
552{ 552{
553 /* Only check low word on 32-bit platforms, since it might be 553 /* Only check low word on 32-bit platforms, since it might be
554 out of sync with upper half. */ 554 out of sync with upper half. */
555 return (unsigned long)native_pmd_val(pmd) == 0; 555 unsigned long val = native_pmd_val(pmd);
556 return (val & ~_PAGE_KNL_ERRATUM_MASK) == 0;
556} 557}
557 558
558static inline unsigned long pmd_page_vaddr(pmd_t pmd) 559static inline unsigned long pmd_page_vaddr(pmd_t pmd)
@@ -616,7 +617,7 @@ static inline unsigned long pages_to_mb(unsigned long npg)
616#if CONFIG_PGTABLE_LEVELS > 2 617#if CONFIG_PGTABLE_LEVELS > 2
617static inline int pud_none(pud_t pud) 618static inline int pud_none(pud_t pud)
618{ 619{
619 return native_pud_val(pud) == 0; 620 return (native_pud_val(pud) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0;
620} 621}
621 622
622static inline int pud_present(pud_t pud) 623static inline int pud_present(pud_t pud)
@@ -694,6 +695,12 @@ static inline int pgd_bad(pgd_t pgd)
694 695
695static inline int pgd_none(pgd_t pgd) 696static inline int pgd_none(pgd_t pgd)
696{ 697{
698 /*
699 * There is no need to do a workaround for the KNL stray
700 * A/D bit erratum here. PGDs only point to page tables
701 * except on 32-bit non-PAE which is not supported on
702 * KNL.
703 */
697 return !native_pgd_val(pgd); 704 return !native_pgd_val(pgd);
698} 705}
699#endif /* CONFIG_PGTABLE_LEVELS > 3 */ 706#endif /* CONFIG_PGTABLE_LEVELS > 3 */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 2ee781114d34..7e8ec7ae10fa 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -140,18 +140,32 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
140#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) 140#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
141#define pte_unmap(pte) ((void)(pte))/* NOP */ 141#define pte_unmap(pte) ((void)(pte))/* NOP */
142 142
143/* Encode and de-code a swap entry */ 143/*
144 * Encode and de-code a swap entry
145 *
146 * | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number
147 * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
148 * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
149 *
150 * G (8) is aliased and used as a PROT_NONE indicator for
151 * !present ptes. We need to start storing swap entries above
152 * there. We also need to avoid using A and D because of an
153 * erratum where they can be incorrectly set by hardware on
154 * non-present PTEs.
155 */
156#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
144#define SWP_TYPE_BITS 5 157#define SWP_TYPE_BITS 5
145#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) 158/* Place the offset above the type: */
159#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
146 160
147#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) 161#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
148 162
149#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ 163#define __swp_type(x) (((x).val >> (SWP_TYPE_FIRST_BIT)) \
150 & ((1U << SWP_TYPE_BITS) - 1)) 164 & ((1U << SWP_TYPE_BITS) - 1))
151#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) 165#define __swp_offset(x) ((x).val >> SWP_OFFSET_FIRST_BIT)
152#define __swp_entry(type, offset) ((swp_entry_t) { \ 166#define __swp_entry(type, offset) ((swp_entry_t) { \
153 ((type) << (_PAGE_BIT_PRESENT + 1)) \ 167 ((type) << (SWP_TYPE_FIRST_BIT)) \
154 | ((offset) << SWP_OFFSET_SHIFT) }) 168 | ((offset) << SWP_OFFSET_FIRST_BIT) })
155#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) 169#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
156#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) 170#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
157 171
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7b5efe264eff..f1218f512f62 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -70,6 +70,12 @@
70 _PAGE_PKEY_BIT2 | \ 70 _PAGE_PKEY_BIT2 | \
71 _PAGE_PKEY_BIT3) 71 _PAGE_PKEY_BIT3)
72 72
73#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
74#define _PAGE_KNL_ERRATUM_MASK (_PAGE_DIRTY | _PAGE_ACCESSED)
75#else
76#define _PAGE_KNL_ERRATUM_MASK 0
77#endif
78
73#ifdef CONFIG_KMEMCHECK 79#ifdef CONFIG_KMEMCHECK
74#define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) 80#define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
75#else 81#else
@@ -475,8 +481,6 @@ extern pmd_t *lookup_pmd_address(unsigned long address);
475extern phys_addr_t slow_virt_to_phys(void *__address); 481extern phys_addr_t slow_virt_to_phys(void *__address);
476extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, 482extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
477 unsigned numpages, unsigned long page_flags); 483 unsigned numpages, unsigned long page_flags);
478void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,
479 unsigned numpages);
480#endif /* !__ASSEMBLY__ */ 484#endif /* !__ASSEMBLY__ */
481 485
482#endif /* _ASM_X86_PGTABLE_DEFS_H */ 486#endif /* _ASM_X86_PGTABLE_DEFS_H */
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index d397deb58146..17f218645701 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -81,7 +81,7 @@ static __always_inline void __preempt_count_sub(int val)
81 */ 81 */
82static __always_inline bool __preempt_count_dec_and_test(void) 82static __always_inline bool __preempt_count_dec_and_test(void)
83{ 83{
84 GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); 84 GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e);
85} 85}
86 86
87/* 87/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 62c6cc3cc5d3..89314ed74fee 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -371,6 +371,10 @@ extern unsigned int xstate_size;
371 371
372struct perf_event; 372struct perf_event;
373 373
374typedef struct {
375 unsigned long seg;
376} mm_segment_t;
377
374struct thread_struct { 378struct thread_struct {
375 /* Cached TLS descriptors: */ 379 /* Cached TLS descriptors: */
376 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; 380 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -419,6 +423,11 @@ struct thread_struct {
419 /* Max allowed port in the bitmap, in bytes: */ 423 /* Max allowed port in the bitmap, in bytes: */
420 unsigned io_bitmap_max; 424 unsigned io_bitmap_max;
421 425
426 mm_segment_t addr_limit;
427
428 unsigned int sig_on_uaccess_err:1;
429 unsigned int uaccess_err:1; /* uaccess failed */
430
422 /* Floating point and extended processor state */ 431 /* Floating point and extended processor state */
423 struct fpu fpu; 432 struct fpu fpu;
424 /* 433 /*
@@ -490,11 +499,6 @@ static inline void load_sp0(struct tss_struct *tss,
490#define set_iopl_mask native_set_iopl_mask 499#define set_iopl_mask native_set_iopl_mask
491#endif /* CONFIG_PARAVIRT */ 500#endif /* CONFIG_PARAVIRT */
492 501
493typedef struct {
494 unsigned long seg;
495} mm_segment_t;
496
497
498/* Free all resources held by a thread. */ 502/* Free all resources held by a thread. */
499extern void release_thread(struct task_struct *); 503extern void release_thread(struct task_struct *);
500 504
@@ -716,6 +720,7 @@ static inline void spin_lock_prefetch(const void *x)
716 .sp0 = TOP_OF_INIT_STACK, \ 720 .sp0 = TOP_OF_INIT_STACK, \
717 .sysenter_cs = __KERNEL_CS, \ 721 .sysenter_cs = __KERNEL_CS, \
718 .io_bitmap_ptr = NULL, \ 722 .io_bitmap_ptr = NULL, \
723 .addr_limit = KERNEL_DS, \
719} 724}
720 725
721extern unsigned long thread_saved_pc(struct task_struct *tsk); 726extern unsigned long thread_saved_pc(struct task_struct *tsk);
@@ -765,8 +770,9 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
765#define STACK_TOP TASK_SIZE 770#define STACK_TOP TASK_SIZE
766#define STACK_TOP_MAX TASK_SIZE_MAX 771#define STACK_TOP_MAX TASK_SIZE_MAX
767 772
768#define INIT_THREAD { \ 773#define INIT_THREAD { \
769 .sp0 = TOP_OF_INIT_STACK \ 774 .sp0 = TOP_OF_INIT_STACK, \
775 .addr_limit = KERNEL_DS, \
770} 776}
771 777
772/* 778/*
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 8f7866a5b9a4..661dd305694a 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -1,11 +1,13 @@
1#ifndef _ASM_X86_RMWcc 1#ifndef _ASM_X86_RMWcc
2#define _ASM_X86_RMWcc 2#define _ASM_X86_RMWcc
3 3
4#ifdef CC_HAVE_ASM_GOTO 4#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
5
6/* Use asm goto */
5 7
6#define __GEN_RMWcc(fullop, var, cc, ...) \ 8#define __GEN_RMWcc(fullop, var, cc, ...) \
7do { \ 9do { \
8 asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ 10 asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \
9 : : "m" (var), ## __VA_ARGS__ \ 11 : : "m" (var), ## __VA_ARGS__ \
10 : "memory" : cc_label); \ 12 : "memory" : cc_label); \
11 return 0; \ 13 return 0; \
@@ -19,15 +21,17 @@ cc_label: \
19#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ 21#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
20 __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val)) 22 __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val))
21 23
22#else /* !CC_HAVE_ASM_GOTO */ 24#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
25
26/* Use flags output or a set instruction */
23 27
24#define __GEN_RMWcc(fullop, var, cc, ...) \ 28#define __GEN_RMWcc(fullop, var, cc, ...) \
25do { \ 29do { \
26 char c; \ 30 bool c; \
27 asm volatile (fullop "; set" cc " %1" \ 31 asm volatile (fullop ";" CC_SET(cc) \
28 : "+m" (var), "=qm" (c) \ 32 : "+m" (var), CC_OUT(cc) (c) \
29 : __VA_ARGS__ : "memory"); \ 33 : __VA_ARGS__ : "memory"); \
30 return c != 0; \ 34 return c; \
31} while (0) 35} while (0)
32 36
33#define GEN_UNARY_RMWcc(op, var, arg0, cc) \ 37#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
@@ -36,6 +40,6 @@ do { \
36#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ 40#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
37 __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val)) 41 __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val))
38 42
39#endif /* CC_HAVE_ASM_GOTO */ 43#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
40 44
41#endif /* _ASM_X86_RMWcc */ 45#endif /* _ASM_X86_RMWcc */
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index 089ced4edbbc..8dbc762ad132 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -77,7 +77,7 @@ static inline void __down_read(struct rw_semaphore *sem)
77/* 77/*
78 * trylock for reading -- returns 1 if successful, 0 if contention 78 * trylock for reading -- returns 1 if successful, 0 if contention
79 */ 79 */
80static inline int __down_read_trylock(struct rw_semaphore *sem) 80static inline bool __down_read_trylock(struct rw_semaphore *sem)
81{ 81{
82 long result, tmp; 82 long result, tmp;
83 asm volatile("# beginning __down_read_trylock\n\t" 83 asm volatile("# beginning __down_read_trylock\n\t"
@@ -93,7 +93,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
93 : "+m" (sem->count), "=&a" (result), "=&r" (tmp) 93 : "+m" (sem->count), "=&a" (result), "=&r" (tmp)
94 : "i" (RWSEM_ACTIVE_READ_BIAS) 94 : "i" (RWSEM_ACTIVE_READ_BIAS)
95 : "memory", "cc"); 95 : "memory", "cc");
96 return result >= 0 ? 1 : 0; 96 return result >= 0;
97} 97}
98 98
99/* 99/*
@@ -134,9 +134,10 @@ static inline int __down_write_killable(struct rw_semaphore *sem)
134/* 134/*
135 * trylock for writing -- returns 1 if successful, 0 if contention 135 * trylock for writing -- returns 1 if successful, 0 if contention
136 */ 136 */
137static inline int __down_write_trylock(struct rw_semaphore *sem) 137static inline bool __down_write_trylock(struct rw_semaphore *sem)
138{ 138{
139 long result, tmp; 139 bool result;
140 long tmp0, tmp1;
140 asm volatile("# beginning __down_write_trylock\n\t" 141 asm volatile("# beginning __down_write_trylock\n\t"
141 " mov %0,%1\n\t" 142 " mov %0,%1\n\t"
142 "1:\n\t" 143 "1:\n\t"
@@ -144,14 +145,14 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
144 /* was the active mask 0 before? */ 145 /* was the active mask 0 before? */
145 " jnz 2f\n\t" 146 " jnz 2f\n\t"
146 " mov %1,%2\n\t" 147 " mov %1,%2\n\t"
147 " add %3,%2\n\t" 148 " add %4,%2\n\t"
148 LOCK_PREFIX " cmpxchg %2,%0\n\t" 149 LOCK_PREFIX " cmpxchg %2,%0\n\t"
149 " jnz 1b\n\t" 150 " jnz 1b\n\t"
150 "2:\n\t" 151 "2:\n\t"
151 " sete %b1\n\t" 152 CC_SET(e)
152 " movzbl %b1, %k1\n\t"
153 "# ending __down_write_trylock\n\t" 153 "# ending __down_write_trylock\n\t"
154 : "+m" (sem->count), "=&a" (result), "=&r" (tmp) 154 : "+m" (sem->count), "=&a" (tmp0), "=&r" (tmp1),
155 CC_OUT(e) (result)
155 : "er" (RWSEM_ACTIVE_WRITE_BIAS) 156 : "er" (RWSEM_ACTIVE_WRITE_BIAS)
156 : "memory", "cc"); 157 : "memory", "cc");
157 return result; 158 return result;
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 2138c9ae19ee..dd1e7d6387ab 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -81,9 +81,9 @@ static inline int __const_sigismember(sigset_t *set, int _sig)
81 81
82static inline int __gen_sigismember(sigset_t *set, int _sig) 82static inline int __gen_sigismember(sigset_t *set, int _sig)
83{ 83{
84 int ret; 84 unsigned char ret;
85 asm("btl %2,%1\n\tsbbl %0,%0" 85 asm("btl %2,%1\n\tsetc %0"
86 : "=r"(ret) : "m"(*set), "Ir"(_sig-1) : "cc"); 86 : "=qm"(ret) : "m"(*set), "Ir"(_sig-1) : "cc");
87 return ret; 87 return ret;
88} 88}
89 89
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 66b057306f40..0576b6157f3a 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -172,12 +172,6 @@ extern int safe_smp_processor_id(void);
172#elif defined(CONFIG_X86_64_SMP) 172#elif defined(CONFIG_X86_64_SMP)
173#define raw_smp_processor_id() (this_cpu_read(cpu_number)) 173#define raw_smp_processor_id() (this_cpu_read(cpu_number))
174 174
175#define stack_smp_processor_id() \
176({ \
177 struct thread_info *ti; \
178 __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \
179 ti->cpu; \
180})
181#define safe_smp_processor_id() smp_processor_id() 175#define safe_smp_processor_id() smp_processor_id()
182 176
183#endif 177#endif
diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h
index f28a24b51dc7..cbf8847d02a0 100644
--- a/arch/x86/include/asm/sync_bitops.h
+++ b/arch/x86/include/asm/sync_bitops.h
@@ -79,10 +79,10 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr)
79 */ 79 */
80static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr) 80static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
81{ 81{
82 int oldbit; 82 unsigned char oldbit;
83 83
84 asm volatile("lock; bts %2,%1\n\tsbbl %0,%0" 84 asm volatile("lock; bts %2,%1\n\tsetc %0"
85 : "=r" (oldbit), "+m" (ADDR) 85 : "=qm" (oldbit), "+m" (ADDR)
86 : "Ir" (nr) : "memory"); 86 : "Ir" (nr) : "memory");
87 return oldbit; 87 return oldbit;
88} 88}
@@ -97,10 +97,10 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
97 */ 97 */
98static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr) 98static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
99{ 99{
100 int oldbit; 100 unsigned char oldbit;
101 101
102 asm volatile("lock; btr %2,%1\n\tsbbl %0,%0" 102 asm volatile("lock; btr %2,%1\n\tsetc %0"
103 : "=r" (oldbit), "+m" (ADDR) 103 : "=qm" (oldbit), "+m" (ADDR)
104 : "Ir" (nr) : "memory"); 104 : "Ir" (nr) : "memory");
105 return oldbit; 105 return oldbit;
106} 106}
@@ -115,10 +115,10 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
115 */ 115 */
116static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr) 116static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
117{ 117{
118 int oldbit; 118 unsigned char oldbit;
119 119
120 asm volatile("lock; btc %2,%1\n\tsbbl %0,%0" 120 asm volatile("lock; btc %2,%1\n\tsetc %0"
121 : "=r" (oldbit), "+m" (ADDR) 121 : "=qm" (oldbit), "+m" (ADDR)
122 : "Ir" (nr) : "memory"); 122 : "Ir" (nr) : "memory");
123 return oldbit; 123 return oldbit;
124} 124}
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 30c133ac05cd..89bff044a6f5 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -57,9 +57,6 @@ struct thread_info {
57 __u32 flags; /* low level flags */ 57 __u32 flags; /* low level flags */
58 __u32 status; /* thread synchronous flags */ 58 __u32 status; /* thread synchronous flags */
59 __u32 cpu; /* current CPU */ 59 __u32 cpu; /* current CPU */
60 mm_segment_t addr_limit;
61 unsigned int sig_on_uaccess_error:1;
62 unsigned int uaccess_err:1; /* uaccess failed */
63}; 60};
64 61
65#define INIT_THREAD_INFO(tsk) \ 62#define INIT_THREAD_INFO(tsk) \
@@ -67,7 +64,6 @@ struct thread_info {
67 .task = &tsk, \ 64 .task = &tsk, \
68 .flags = 0, \ 65 .flags = 0, \
69 .cpu = 0, \ 66 .cpu = 0, \
70 .addr_limit = KERNEL_DS, \
71} 67}
72 68
73#define init_thread_info (init_thread_union.thread_info) 69#define init_thread_info (init_thread_union.thread_info)
@@ -186,11 +182,6 @@ static inline unsigned long current_stack_pointer(void)
186# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) 182# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
187#endif 183#endif
188 184
189/* Load thread_info address into "reg" */
190#define GET_THREAD_INFO(reg) \
191 _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
192 _ASM_SUB $(THREAD_SIZE),reg ;
193
194/* 185/*
195 * ASM operand which evaluates to a 'thread_info' address of 186 * ASM operand which evaluates to a 'thread_info' address of
196 * the current task, if it is known that "reg" is exactly "off" 187 * the current task, if it is known that "reg" is exactly "off"
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 2982387ba817..c03bfb68c503 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -29,12 +29,12 @@
29#define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX) 29#define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX)
30 30
31#define get_ds() (KERNEL_DS) 31#define get_ds() (KERNEL_DS)
32#define get_fs() (current_thread_info()->addr_limit) 32#define get_fs() (current->thread.addr_limit)
33#define set_fs(x) (current_thread_info()->addr_limit = (x)) 33#define set_fs(x) (current->thread.addr_limit = (x))
34 34
35#define segment_eq(a, b) ((a).seg == (b).seg) 35#define segment_eq(a, b) ((a).seg == (b).seg)
36 36
37#define user_addr_max() (current_thread_info()->addr_limit.seg) 37#define user_addr_max() (current->thread.addr_limit.seg)
38#define __addr_ok(addr) \ 38#define __addr_ok(addr) \
39 ((unsigned long __force)(addr) < user_addr_max()) 39 ((unsigned long __force)(addr) < user_addr_max())
40 40
@@ -342,7 +342,26 @@ do { \
342} while (0) 342} while (0)
343 343
344#ifdef CONFIG_X86_32 344#ifdef CONFIG_X86_32
345#define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad() 345#define __get_user_asm_u64(x, ptr, retval, errret) \
346({ \
347 __typeof__(ptr) __ptr = (ptr); \
348 asm volatile(ASM_STAC "\n" \
349 "1: movl %2,%%eax\n" \
350 "2: movl %3,%%edx\n" \
351 "3: " ASM_CLAC "\n" \
352 ".section .fixup,\"ax\"\n" \
353 "4: mov %4,%0\n" \
354 " xorl %%eax,%%eax\n" \
355 " xorl %%edx,%%edx\n" \
356 " jmp 3b\n" \
357 ".previous\n" \
358 _ASM_EXTABLE(1b, 4b) \
359 _ASM_EXTABLE(2b, 4b) \
360 : "=r" (retval), "=A"(x) \
361 : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1), \
362 "i" (errret), "0" (retval)); \
363})
364
346#define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad() 365#define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad()
347#else 366#else
348#define __get_user_asm_u64(x, ptr, retval, errret) \ 367#define __get_user_asm_u64(x, ptr, retval, errret) \
@@ -429,7 +448,7 @@ do { \
429#define __get_user_nocheck(x, ptr, size) \ 448#define __get_user_nocheck(x, ptr, size) \
430({ \ 449({ \
431 int __gu_err; \ 450 int __gu_err; \
432 unsigned long __gu_val; \ 451 __inttype(*(ptr)) __gu_val; \
433 __uaccess_begin(); \ 452 __uaccess_begin(); \
434 __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ 453 __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
435 __uaccess_end(); \ 454 __uaccess_end(); \
@@ -468,13 +487,13 @@ struct __large_struct { unsigned long buf[100]; };
468 * uaccess_try and catch 487 * uaccess_try and catch
469 */ 488 */
470#define uaccess_try do { \ 489#define uaccess_try do { \
471 current_thread_info()->uaccess_err = 0; \ 490 current->thread.uaccess_err = 0; \
472 __uaccess_begin(); \ 491 __uaccess_begin(); \
473 barrier(); 492 barrier();
474 493
475#define uaccess_catch(err) \ 494#define uaccess_catch(err) \
476 __uaccess_end(); \ 495 __uaccess_end(); \
477 (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ 496 (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \
478} while (0) 497} while (0)
479 498
480/** 499/**
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 2b19caa4081c..32712a925f26 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -26,6 +26,8 @@
26# define __ARCH_WANT_COMPAT_SYS_GETDENTS64 26# define __ARCH_WANT_COMPAT_SYS_GETDENTS64
27# define __ARCH_WANT_COMPAT_SYS_PREADV64 27# define __ARCH_WANT_COMPAT_SYS_PREADV64
28# define __ARCH_WANT_COMPAT_SYS_PWRITEV64 28# define __ARCH_WANT_COMPAT_SYS_PWRITEV64
29# define __ARCH_WANT_COMPAT_SYS_PREADV64V2
30# define __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
29 31
30# endif 32# endif
31 33
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 674134e9f5e5..2bd5c6ff7ee7 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -31,7 +31,9 @@ void common(void) {
31 BLANK(); 31 BLANK();
32 OFFSET(TI_flags, thread_info, flags); 32 OFFSET(TI_flags, thread_info, flags);
33 OFFSET(TI_status, thread_info, status); 33 OFFSET(TI_status, thread_info, status);
34 OFFSET(TI_addr_limit, thread_info, addr_limit); 34
35 BLANK();
36 OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
35 37
36 BLANK(); 38 BLANK();
37 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); 39 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0fe6953f421c..d22a7b9c4f0e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1452,7 +1452,7 @@ void cpu_init(void)
1452 struct task_struct *me; 1452 struct task_struct *me;
1453 struct tss_struct *t; 1453 struct tss_struct *t;
1454 unsigned long v; 1454 unsigned long v;
1455 int cpu = stack_smp_processor_id(); 1455 int cpu = raw_smp_processor_id();
1456 int i; 1456 int i;
1457 1457
1458 wait_for_master_cpu(cpu); 1458 wait_for_master_cpu(cpu);
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index f6f50c4ceaec..cfa97ff67bda 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -39,9 +39,9 @@ __setup("nordrand", x86_rdrand_setup);
39 */ 39 */
40#define SANITY_CHECK_LOOPS 8 40#define SANITY_CHECK_LOOPS 8
41 41
42#ifdef CONFIG_ARCH_RANDOM
42void x86_init_rdrand(struct cpuinfo_x86 *c) 43void x86_init_rdrand(struct cpuinfo_x86 *c)
43{ 44{
44#ifdef CONFIG_ARCH_RANDOM
45 unsigned long tmp; 45 unsigned long tmp;
46 int i; 46 int i;
47 47
@@ -55,5 +55,5 @@ void x86_init_rdrand(struct cpuinfo_x86 *c)
55 return; 55 return;
56 } 56 }
57 } 57 }
58#endif
59} 58}
59#endif
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index ef8017ca5ba9..de8242d8bb61 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -87,7 +87,7 @@ static inline int valid_stack_ptr(struct task_struct *task,
87 else 87 else
88 return 0; 88 return 0;
89 } 89 }
90 return p > t && p < t + THREAD_SIZE - size; 90 return p >= t && p < t + THREAD_SIZE - size;
91} 91}
92 92
93unsigned long 93unsigned long
@@ -98,6 +98,14 @@ print_context_stack(struct task_struct *task,
98{ 98{
99 struct stack_frame *frame = (struct stack_frame *)bp; 99 struct stack_frame *frame = (struct stack_frame *)bp;
100 100
101 /*
102 * If we overflowed the stack into a guard page, jump back to the
103 * bottom of the usable stack.
104 */
105 if ((unsigned long)task_stack_page(task) - (unsigned long)stack <
106 PAGE_SIZE)
107 stack = (unsigned long *)task_stack_page(task);
108
101 while (valid_stack_ptr(task, stack, sizeof(*stack), end)) { 109 while (valid_stack_ptr(task, stack, sizeof(*stack), end)) {
102 unsigned long addr; 110 unsigned long addr;
103 111
@@ -226,6 +234,8 @@ unsigned long oops_begin(void)
226EXPORT_SYMBOL_GPL(oops_begin); 234EXPORT_SYMBOL_GPL(oops_begin);
227NOKPROBE_SYMBOL(oops_begin); 235NOKPROBE_SYMBOL(oops_begin);
228 236
237void __noreturn rewind_stack_do_exit(int signr);
238
229void oops_end(unsigned long flags, struct pt_regs *regs, int signr) 239void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
230{ 240{
231 if (regs && kexec_should_crash(current)) 241 if (regs && kexec_should_crash(current))
@@ -247,7 +257,13 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
247 panic("Fatal exception in interrupt"); 257 panic("Fatal exception in interrupt");
248 if (panic_on_oops) 258 if (panic_on_oops)
249 panic("Fatal exception"); 259 panic("Fatal exception");
250 do_exit(signr); 260
261 /*
262 * We're not going to return, but we might be on an IST stack or
263 * have very little stack space left. Rewind the stack and kill
264 * the task.
265 */
266 rewind_stack_do_exit(signr);
251} 267}
252NOKPROBE_SYMBOL(oops_end); 268NOKPROBE_SYMBOL(oops_end);
253 269
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index d558a8a49016..2552a1eadfed 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -272,6 +272,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
272 272
273 stack = sp; 273 stack = sp;
274 for (i = 0; i < kstack_depth_to_print; i++) { 274 for (i = 0; i < kstack_depth_to_print; i++) {
275 unsigned long word;
276
275 if (stack >= irq_stack && stack <= irq_stack_end) { 277 if (stack >= irq_stack && stack <= irq_stack_end) {
276 if (stack == irq_stack_end) { 278 if (stack == irq_stack_end) {
277 stack = (unsigned long *) (irq_stack_end[-1]); 279 stack = (unsigned long *) (irq_stack_end[-1]);
@@ -281,12 +283,18 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
281 if (kstack_end(stack)) 283 if (kstack_end(stack))
282 break; 284 break;
283 } 285 }
286
287 if (probe_kernel_address(stack, word))
288 break;
289
284 if ((i % STACKSLOTS_PER_LINE) == 0) { 290 if ((i % STACKSLOTS_PER_LINE) == 0) {
285 if (i != 0) 291 if (i != 0)
286 pr_cont("\n"); 292 pr_cont("\n");
287 printk("%s %016lx", log_lvl, *stack++); 293 printk("%s %016lx", log_lvl, word);
288 } else 294 } else
289 pr_cont(" %016lx", *stack++); 295 pr_cont(" %016lx", word);
296
297 stack++;
290 touch_nmi_watchdog(); 298 touch_nmi_watchdog();
291 } 299 }
292 preempt_enable(); 300 preempt_enable();
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 64341aa485ae..d40ee8a38fed 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -42,3 +42,5 @@ EXPORT_SYMBOL(empty_zero_page);
42EXPORT_SYMBOL(___preempt_schedule); 42EXPORT_SYMBOL(___preempt_schedule);
43EXPORT_SYMBOL(___preempt_schedule_notrace); 43EXPORT_SYMBOL(___preempt_schedule_notrace);
44#endif 44#endif
45
46EXPORT_SYMBOL(__sw_hweight32);
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index dc3c0b1c816f..b44564bf86a8 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -1,11 +1,104 @@
1#include <linux/compat.h> 1#include <linux/compat.h>
2#include <linux/uaccess.h> 2#include <linux/uaccess.h>
3 3
4/*
5 * The compat_siginfo_t structure and handing code is very easy
6 * to break in several ways. It must always be updated when new
7 * updates are made to the main siginfo_t, and
8 * copy_siginfo_to_user32() must be updated when the
9 * (arch-independent) copy_siginfo_to_user() is updated.
10 *
11 * It is also easy to put a new member in the compat_siginfo_t
12 * which has implicit alignment which can move internal structure
13 * alignment around breaking the ABI. This can happen if you,
14 * for instance, put a plain 64-bit value in there.
15 */
16static inline void signal_compat_build_tests(void)
17{
18 int _sifields_offset = offsetof(compat_siginfo_t, _sifields);
19
20 /*
21 * If adding a new si_code, there is probably new data in
22 * the siginfo. Make sure folks bumping the si_code
23 * limits also have to look at this code. Make sure any
24 * new fields are handled in copy_siginfo_to_user32()!
25 */
26 BUILD_BUG_ON(NSIGILL != 8);
27 BUILD_BUG_ON(NSIGFPE != 8);
28 BUILD_BUG_ON(NSIGSEGV != 4);
29 BUILD_BUG_ON(NSIGBUS != 5);
30 BUILD_BUG_ON(NSIGTRAP != 4);
31 BUILD_BUG_ON(NSIGCHLD != 6);
32 BUILD_BUG_ON(NSIGSYS != 1);
33
34 /* This is part of the ABI and can never change in size: */
35 BUILD_BUG_ON(sizeof(compat_siginfo_t) != 128);
36 /*
37 * The offsets of all the (unioned) si_fields are fixed
38 * in the ABI, of course. Make sure none of them ever
39 * move and are always at the beginning:
40 */
41 BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int));
42#define CHECK_CSI_OFFSET(name) BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name))
43
44 /*
45 * Ensure that the size of each si_field never changes.
46 * If it does, it is a sign that the
47 * copy_siginfo_to_user32() code below needs to updated
48 * along with the size in the CHECK_SI_SIZE().
49 *
50 * We repeat this check for both the generic and compat
51 * siginfos.
52 *
53 * Note: it is OK for these to grow as long as the whole
54 * structure stays within the padding size (checked
55 * above).
56 */
57#define CHECK_CSI_SIZE(name, size) BUILD_BUG_ON(size != sizeof(((compat_siginfo_t *)0)->_sifields.name))
58#define CHECK_SI_SIZE(name, size) BUILD_BUG_ON(size != sizeof(((siginfo_t *)0)->_sifields.name))
59
60 CHECK_CSI_OFFSET(_kill);
61 CHECK_CSI_SIZE (_kill, 2*sizeof(int));
62 CHECK_SI_SIZE (_kill, 2*sizeof(int));
63
64 CHECK_CSI_OFFSET(_timer);
65 CHECK_CSI_SIZE (_timer, 5*sizeof(int));
66 CHECK_SI_SIZE (_timer, 6*sizeof(int));
67
68 CHECK_CSI_OFFSET(_rt);
69 CHECK_CSI_SIZE (_rt, 3*sizeof(int));
70 CHECK_SI_SIZE (_rt, 4*sizeof(int));
71
72 CHECK_CSI_OFFSET(_sigchld);
73 CHECK_CSI_SIZE (_sigchld, 5*sizeof(int));
74 CHECK_SI_SIZE (_sigchld, 8*sizeof(int));
75
76 CHECK_CSI_OFFSET(_sigchld_x32);
77 CHECK_CSI_SIZE (_sigchld_x32, 7*sizeof(int));
78 /* no _sigchld_x32 in the generic siginfo_t */
79
80 CHECK_CSI_OFFSET(_sigfault);
81 CHECK_CSI_SIZE (_sigfault, 4*sizeof(int));
82 CHECK_SI_SIZE (_sigfault, 8*sizeof(int));
83
84 CHECK_CSI_OFFSET(_sigpoll);
85 CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int));
86 CHECK_SI_SIZE (_sigpoll, 4*sizeof(int));
87
88 CHECK_CSI_OFFSET(_sigsys);
89 CHECK_CSI_SIZE (_sigsys, 3*sizeof(int));
90 CHECK_SI_SIZE (_sigsys, 4*sizeof(int));
91
92 /* any new si_fields should be added here */
93}
94
4int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) 95int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
5{ 96{
6 int err = 0; 97 int err = 0;
7 bool ia32 = test_thread_flag(TIF_IA32); 98 bool ia32 = test_thread_flag(TIF_IA32);
8 99
100 signal_compat_build_tests();
101
9 if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) 102 if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
10 return -EFAULT; 103 return -EFAULT;
11 104
@@ -32,6 +125,21 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
32 &to->_sifields._pad[0]); 125 &to->_sifields._pad[0]);
33 switch (from->si_code >> 16) { 126 switch (from->si_code >> 16) {
34 case __SI_FAULT >> 16: 127 case __SI_FAULT >> 16:
128 if (from->si_signo == SIGBUS &&
129 (from->si_code == BUS_MCEERR_AR ||
130 from->si_code == BUS_MCEERR_AO))
131 put_user_ex(from->si_addr_lsb, &to->si_addr_lsb);
132
133 if (from->si_signo == SIGSEGV) {
134 if (from->si_code == SEGV_BNDERR) {
135 compat_uptr_t lower = (unsigned long)&to->si_lower;
136 compat_uptr_t upper = (unsigned long)&to->si_upper;
137 put_user_ex(lower, &to->si_lower);
138 put_user_ex(upper, &to->si_upper);
139 }
140 if (from->si_code == SEGV_PKUERR)
141 put_user_ex(from->si_pkey, &to->si_pkey);
142 }
35 break; 143 break;
36 case __SI_SYS >> 16: 144 case __SI_SYS >> 16:
37 put_user_ex(from->si_syscall, &to->si_syscall); 145 put_user_ex(from->si_syscall, &to->si_syscall);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2ed0ec1353f8..d0a51939c150 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1292,7 +1292,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1292 cpumask_copy(cpu_callin_mask, cpumask_of(0)); 1292 cpumask_copy(cpu_callin_mask, cpumask_of(0));
1293 mb(); 1293 mb();
1294 1294
1295 current_thread_info()->cpu = 0; /* needed? */
1296 for_each_possible_cpu(i) { 1295 for_each_possible_cpu(i) {
1297 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); 1296 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
1298 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); 1297 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 3dce1ca0a653..01f30e56f99e 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -440,10 +440,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs)
440 440
441static inline int is_revectored(int nr, struct revectored_struct *bitmap) 441static inline int is_revectored(int nr, struct revectored_struct *bitmap)
442{ 442{
443 __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" 443 return test_bit(nr, bitmap->__map);
444 :"=r" (nr)
445 :"m" (*bitmap), "r" (nr));
446 return nr;
447} 444}
448 445
449#define val_byte(val, n) (((__u8 *)&val)[n]) 446#define val_byte(val, n) (((__u8 *)&val)[n])
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index cd05942bc918..f1aebfb49c36 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -44,6 +44,9 @@ EXPORT_SYMBOL(clear_page);
44 44
45EXPORT_SYMBOL(csum_partial); 45EXPORT_SYMBOL(csum_partial);
46 46
47EXPORT_SYMBOL(__sw_hweight32);
48EXPORT_SYMBOL(__sw_hweight64);
49
47/* 50/*
48 * Export string functions. We normally rely on gcc builtin for most of these, 51 * Export string functions. We normally rely on gcc builtin for most of these,
49 * but gcc sometimes decides not to inline them. 52 * but gcc sometimes decides not to inline them.
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 72a576752a7e..ec969cc3eb20 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -25,7 +25,7 @@ lib-y += memcpy_$(BITS).o
25lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o 25lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
26lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o 26lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
27 27
28obj-y += msr.o msr-reg.o msr-reg-export.o 28obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
29 29
30ifeq ($(CONFIG_X86_32),y) 30ifeq ($(CONFIG_X86_32),y)
31 obj-y += atomic64_32.o 31 obj-y += atomic64_32.o
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 2b0ef26da0bd..bf603ebbfd8e 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -17,11 +17,11 @@
17 17
18/* Standard copy_to_user with segment limit checking */ 18/* Standard copy_to_user with segment limit checking */
19ENTRY(_copy_to_user) 19ENTRY(_copy_to_user)
20 GET_THREAD_INFO(%rax) 20 mov PER_CPU_VAR(current_task), %rax
21 movq %rdi,%rcx 21 movq %rdi,%rcx
22 addq %rdx,%rcx 22 addq %rdx,%rcx
23 jc bad_to_user 23 jc bad_to_user
24 cmpq TI_addr_limit(%rax),%rcx 24 cmpq TASK_addr_limit(%rax),%rcx
25 ja bad_to_user 25 ja bad_to_user
26 ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \ 26 ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
27 "jmp copy_user_generic_string", \ 27 "jmp copy_user_generic_string", \
@@ -32,11 +32,11 @@ ENDPROC(_copy_to_user)
32 32
33/* Standard copy_from_user with segment limit checking */ 33/* Standard copy_from_user with segment limit checking */
34ENTRY(_copy_from_user) 34ENTRY(_copy_from_user)
35 GET_THREAD_INFO(%rax) 35 mov PER_CPU_VAR(current_task), %rax
36 movq %rsi,%rcx 36 movq %rsi,%rcx
37 addq %rdx,%rcx 37 addq %rdx,%rcx
38 jc bad_from_user 38 jc bad_from_user
39 cmpq TI_addr_limit(%rax),%rcx 39 cmpq TASK_addr_limit(%rax),%rcx
40 ja bad_from_user 40 ja bad_from_user
41 ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \ 41 ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
42 "jmp copy_user_generic_string", \ 42 "jmp copy_user_generic_string", \
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index 28a6654f0d08..b6fcb9a9ddbc 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -6,6 +6,7 @@
6 */ 6 */
7#include <asm/checksum.h> 7#include <asm/checksum.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/uaccess.h>
9#include <asm/smap.h> 10#include <asm/smap.h>
10 11
11/** 12/**
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 46668cda4ffd..0ef5128c2de8 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -35,8 +35,8 @@
35 35
36 .text 36 .text
37ENTRY(__get_user_1) 37ENTRY(__get_user_1)
38 GET_THREAD_INFO(%_ASM_DX) 38 mov PER_CPU_VAR(current_task), %_ASM_DX
39 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 39 cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
40 jae bad_get_user 40 jae bad_get_user
41 ASM_STAC 41 ASM_STAC
421: movzbl (%_ASM_AX),%edx 421: movzbl (%_ASM_AX),%edx
@@ -48,8 +48,8 @@ ENDPROC(__get_user_1)
48ENTRY(__get_user_2) 48ENTRY(__get_user_2)
49 add $1,%_ASM_AX 49 add $1,%_ASM_AX
50 jc bad_get_user 50 jc bad_get_user
51 GET_THREAD_INFO(%_ASM_DX) 51 mov PER_CPU_VAR(current_task), %_ASM_DX
52 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 52 cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
53 jae bad_get_user 53 jae bad_get_user
54 ASM_STAC 54 ASM_STAC
552: movzwl -1(%_ASM_AX),%edx 552: movzwl -1(%_ASM_AX),%edx
@@ -61,8 +61,8 @@ ENDPROC(__get_user_2)
61ENTRY(__get_user_4) 61ENTRY(__get_user_4)
62 add $3,%_ASM_AX 62 add $3,%_ASM_AX
63 jc bad_get_user 63 jc bad_get_user
64 GET_THREAD_INFO(%_ASM_DX) 64 mov PER_CPU_VAR(current_task), %_ASM_DX
65 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 65 cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
66 jae bad_get_user 66 jae bad_get_user
67 ASM_STAC 67 ASM_STAC
683: movl -3(%_ASM_AX),%edx 683: movl -3(%_ASM_AX),%edx
@@ -75,8 +75,8 @@ ENTRY(__get_user_8)
75#ifdef CONFIG_X86_64 75#ifdef CONFIG_X86_64
76 add $7,%_ASM_AX 76 add $7,%_ASM_AX
77 jc bad_get_user 77 jc bad_get_user
78 GET_THREAD_INFO(%_ASM_DX) 78 mov PER_CPU_VAR(current_task), %_ASM_DX
79 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 79 cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
80 jae bad_get_user 80 jae bad_get_user
81 ASM_STAC 81 ASM_STAC
824: movq -7(%_ASM_AX),%rdx 824: movq -7(%_ASM_AX),%rdx
@@ -86,8 +86,8 @@ ENTRY(__get_user_8)
86#else 86#else
87 add $7,%_ASM_AX 87 add $7,%_ASM_AX
88 jc bad_get_user_8 88 jc bad_get_user_8
89 GET_THREAD_INFO(%_ASM_DX) 89 mov PER_CPU_VAR(current_task), %_ASM_DX
90 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 90 cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
91 jae bad_get_user_8 91 jae bad_get_user_8
92 ASM_STAC 92 ASM_STAC
934: movl -7(%_ASM_AX),%edx 934: movl -7(%_ASM_AX),%edx
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
new file mode 100644
index 000000000000..02de3d74d2c5
--- /dev/null
+++ b/arch/x86/lib/hweight.S
@@ -0,0 +1,77 @@
1#include <linux/linkage.h>
2
3#include <asm/asm.h>
4
5/*
6 * unsigned int __sw_hweight32(unsigned int w)
7 * %rdi: w
8 */
9ENTRY(__sw_hweight32)
10
11#ifdef CONFIG_X86_64
12 movl %edi, %eax # w
13#endif
14 __ASM_SIZE(push,) %__ASM_REG(dx)
15 movl %eax, %edx # w -> t
16 shrl %edx # t >>= 1
17 andl $0x55555555, %edx # t &= 0x55555555
18 subl %edx, %eax # w -= t
19
20 movl %eax, %edx # w -> t
21 shrl $2, %eax # w_tmp >>= 2
22 andl $0x33333333, %edx # t &= 0x33333333
23 andl $0x33333333, %eax # w_tmp &= 0x33333333
24 addl %edx, %eax # w = w_tmp + t
25
26 movl %eax, %edx # w -> t
27 shrl $4, %edx # t >>= 4
28 addl %edx, %eax # w_tmp += t
29 andl $0x0f0f0f0f, %eax # w_tmp &= 0x0f0f0f0f
30 imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101
31 shrl $24, %eax # w = w_tmp >> 24
32 __ASM_SIZE(pop,) %__ASM_REG(dx)
33 ret
34ENDPROC(__sw_hweight32)
35
36ENTRY(__sw_hweight64)
37#ifdef CONFIG_X86_64
38 pushq %rdx
39
40 movq %rdi, %rdx # w -> t
41 movabsq $0x5555555555555555, %rax
42 shrq %rdx # t >>= 1
43 andq %rdx, %rax # t &= 0x5555555555555555
44 movabsq $0x3333333333333333, %rdx
45 subq %rax, %rdi # w -= t
46
47 movq %rdi, %rax # w -> t
48 shrq $2, %rdi # w_tmp >>= 2
49 andq %rdx, %rax # t &= 0x3333333333333333
50 andq %rdi, %rdx # w_tmp &= 0x3333333333333333
51 addq %rdx, %rax # w = w_tmp + t
52
53 movq %rax, %rdx # w -> t
54 shrq $4, %rdx # t >>= 4
55 addq %rdx, %rax # w_tmp += t
56 movabsq $0x0f0f0f0f0f0f0f0f, %rdx
57 andq %rdx, %rax # w_tmp &= 0x0f0f0f0f0f0f0f0f
58 movabsq $0x0101010101010101, %rdx
59 imulq %rdx, %rax # w_tmp *= 0x0101010101010101
60 shrq $56, %rax # w = w_tmp >> 56
61
62 popq %rdx
63 ret
64#else /* CONFIG_X86_32 */
65 /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
66 pushl %ecx
67
68 call __sw_hweight32
69 movl %eax, %ecx # stash away result
70 movl %edx, %eax # second part of input
71 call __sw_hweight32
72 addl %ecx, %eax # result
73
74 popl %ecx
75 ret
76#endif
77ENDPROC(__sw_hweight64)
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index e0817a12d323..c891ece81e5b 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -29,14 +29,14 @@
29 * as they get called from within inline assembly. 29 * as they get called from within inline assembly.
30 */ 30 */
31 31
32#define ENTER GET_THREAD_INFO(%_ASM_BX) 32#define ENTER mov PER_CPU_VAR(current_task), %_ASM_BX
33#define EXIT ASM_CLAC ; \ 33#define EXIT ASM_CLAC ; \
34 ret 34 ret
35 35
36.text 36.text
37ENTRY(__put_user_1) 37ENTRY(__put_user_1)
38 ENTER 38 ENTER
39 cmp TI_addr_limit(%_ASM_BX),%_ASM_CX 39 cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX
40 jae bad_put_user 40 jae bad_put_user
41 ASM_STAC 41 ASM_STAC
421: movb %al,(%_ASM_CX) 421: movb %al,(%_ASM_CX)
@@ -46,7 +46,7 @@ ENDPROC(__put_user_1)
46 46
47ENTRY(__put_user_2) 47ENTRY(__put_user_2)
48 ENTER 48 ENTER
49 mov TI_addr_limit(%_ASM_BX),%_ASM_BX 49 mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
50 sub $1,%_ASM_BX 50 sub $1,%_ASM_BX
51 cmp %_ASM_BX,%_ASM_CX 51 cmp %_ASM_BX,%_ASM_CX
52 jae bad_put_user 52 jae bad_put_user
@@ -58,7 +58,7 @@ ENDPROC(__put_user_2)
58 58
59ENTRY(__put_user_4) 59ENTRY(__put_user_4)
60 ENTER 60 ENTER
61 mov TI_addr_limit(%_ASM_BX),%_ASM_BX 61 mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
62 sub $3,%_ASM_BX 62 sub $3,%_ASM_BX
63 cmp %_ASM_BX,%_ASM_CX 63 cmp %_ASM_BX,%_ASM_CX
64 jae bad_put_user 64 jae bad_put_user
@@ -70,7 +70,7 @@ ENDPROC(__put_user_4)
70 70
71ENTRY(__put_user_8) 71ENTRY(__put_user_8)
72 ENTER 72 ENTER
73 mov TI_addr_limit(%_ASM_BX),%_ASM_BX 73 mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
74 sub $7,%_ASM_BX 74 sub $7,%_ASM_BX
75 cmp %_ASM_BX,%_ASM_CX 75 cmp %_ASM_BX,%_ASM_CX
76 jae bad_put_user 76 jae bad_put_user
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 0a42327a59d7..9f760cdcaf40 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -6,7 +6,7 @@
6 * Copyright 2002 Andi Kleen <ak@suse.de> 6 * Copyright 2002 Andi Kleen <ak@suse.de>
7 */ 7 */
8#include <linux/module.h> 8#include <linux/module.h>
9#include <asm/uaccess.h> 9#include <linux/uaccess.h>
10 10
11/* 11/*
12 * Zero Userspace 12 * Zero Userspace
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 4bb53b89f3c5..0f90cc218d04 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -37,7 +37,7 @@ bool ex_handler_ext(const struct exception_table_entry *fixup,
37 struct pt_regs *regs, int trapnr) 37 struct pt_regs *regs, int trapnr)
38{ 38{
39 /* Special hack for uaccess_err */ 39 /* Special hack for uaccess_err */
40 current_thread_info()->uaccess_err = 1; 40 current->thread.uaccess_err = 1;
41 regs->ip = ex_fixup_addr(fixup); 41 regs->ip = ex_fixup_addr(fixup);
42 return true; 42 return true;
43} 43}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7d1fa7cd2374..d22161ab941d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -439,7 +439,7 @@ static noinline int vmalloc_fault(unsigned long address)
439 * happen within a race in page table update. In the later 439 * happen within a race in page table update. In the later
440 * case just flush: 440 * case just flush:
441 */ 441 */
442 pgd = pgd_offset(current->active_mm, address); 442 pgd = (pgd_t *)__va(read_cr3()) + pgd_index(address);
443 pgd_ref = pgd_offset_k(address); 443 pgd_ref = pgd_offset_k(address);
444 if (pgd_none(*pgd_ref)) 444 if (pgd_none(*pgd_ref))
445 return -1; 445 return -1;
@@ -737,7 +737,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
737 * In this case we need to make sure we're not recursively 737 * In this case we need to make sure we're not recursively
738 * faulting through the emulate_vsyscall() logic. 738 * faulting through the emulate_vsyscall() logic.
739 */ 739 */
740 if (current_thread_info()->sig_on_uaccess_error && signal) { 740 if (current->thread.sig_on_uaccess_err && signal) {
741 tsk->thread.trap_nr = X86_TRAP_PF; 741 tsk->thread.trap_nr = X86_TRAP_PF;
742 tsk->thread.error_code = error_code | PF_USER; 742 tsk->thread.error_code = error_code | PF_USER;
743 tsk->thread.cr2 = address; 743 tsk->thread.cr2 = address;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bce2e5d9edd4..e14f87057c3f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -354,7 +354,7 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
354 * pagetable pages as RO. So assume someone who pre-setup 354 * pagetable pages as RO. So assume someone who pre-setup
355 * these mappings are more intelligent. 355 * these mappings are more intelligent.
356 */ 356 */
357 if (pte_val(*pte)) { 357 if (!pte_none(*pte)) {
358 if (!after_bootmem) 358 if (!after_bootmem)
359 pages++; 359 pages++;
360 continue; 360 continue;
@@ -396,7 +396,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
396 continue; 396 continue;
397 } 397 }
398 398
399 if (pmd_val(*pmd)) { 399 if (!pmd_none(*pmd)) {
400 if (!pmd_large(*pmd)) { 400 if (!pmd_large(*pmd)) {
401 spin_lock(&init_mm.page_table_lock); 401 spin_lock(&init_mm.page_table_lock);
402 pte = (pte_t *)pmd_page_vaddr(*pmd); 402 pte = (pte_t *)pmd_page_vaddr(*pmd);
@@ -470,7 +470,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
470 continue; 470 continue;
471 } 471 }
472 472
473 if (pud_val(*pud)) { 473 if (!pud_none(*pud)) {
474 if (!pud_large(*pud)) { 474 if (!pud_large(*pud)) {
475 pmd = pmd_offset(pud, 0); 475 pmd = pmd_offset(pud, 0);
476 last_map_addr = phys_pmd_init(pmd, addr, end, 476 last_map_addr = phys_pmd_init(pmd, addr, end,
@@ -673,7 +673,7 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
673 673
674 for (i = 0; i < PTRS_PER_PTE; i++) { 674 for (i = 0; i < PTRS_PER_PTE; i++) {
675 pte = pte_start + i; 675 pte = pte_start + i;
676 if (pte_val(*pte)) 676 if (!pte_none(*pte))
677 return; 677 return;
678 } 678 }
679 679
@@ -691,7 +691,7 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
691 691
692 for (i = 0; i < PTRS_PER_PMD; i++) { 692 for (i = 0; i < PTRS_PER_PMD; i++) {
693 pmd = pmd_start + i; 693 pmd = pmd_start + i;
694 if (pmd_val(*pmd)) 694 if (!pmd_none(*pmd))
695 return; 695 return;
696 } 696 }
697 697
@@ -702,27 +702,6 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
702 spin_unlock(&init_mm.page_table_lock); 702 spin_unlock(&init_mm.page_table_lock);
703} 703}
704 704
705/* Return true if pgd is changed, otherwise return false. */
706static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd)
707{
708 pud_t *pud;
709 int i;
710
711 for (i = 0; i < PTRS_PER_PUD; i++) {
712 pud = pud_start + i;
713 if (pud_val(*pud))
714 return false;
715 }
716
717 /* free a pud table */
718 free_pagetable(pgd_page(*pgd), 0);
719 spin_lock(&init_mm.page_table_lock);
720 pgd_clear(pgd);
721 spin_unlock(&init_mm.page_table_lock);
722
723 return true;
724}
725
726static void __meminit 705static void __meminit
727remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, 706remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
728 bool direct) 707 bool direct)
@@ -913,7 +892,6 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
913 unsigned long addr; 892 unsigned long addr;
914 pgd_t *pgd; 893 pgd_t *pgd;
915 pud_t *pud; 894 pud_t *pud;
916 bool pgd_changed = false;
917 895
918 for (addr = start; addr < end; addr = next) { 896 for (addr = start; addr < end; addr = next) {
919 next = pgd_addr_end(addr, end); 897 next = pgd_addr_end(addr, end);
@@ -924,13 +902,8 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
924 902
925 pud = (pud_t *)pgd_page_vaddr(*pgd); 903 pud = (pud_t *)pgd_page_vaddr(*pgd);
926 remove_pud_table(pud, addr, next, direct); 904 remove_pud_table(pud, addr, next, direct);
927 if (free_pud_table(pud, pgd))
928 pgd_changed = true;
929 } 905 }
930 906
931 if (pgd_changed)
932 sync_global_pgds(start, end - 1, 1);
933
934 flush_tlb_all(); 907 flush_tlb_all();
935} 908}
936 909
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7a1f7bbf4105..47870a534877 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -746,18 +746,6 @@ static bool try_to_free_pmd_page(pmd_t *pmd)
746 return true; 746 return true;
747} 747}
748 748
749static bool try_to_free_pud_page(pud_t *pud)
750{
751 int i;
752
753 for (i = 0; i < PTRS_PER_PUD; i++)
754 if (!pud_none(pud[i]))
755 return false;
756
757 free_page((unsigned long)pud);
758 return true;
759}
760
761static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) 749static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
762{ 750{
763 pte_t *pte = pte_offset_kernel(pmd, start); 751 pte_t *pte = pte_offset_kernel(pmd, start);
@@ -871,16 +859,6 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
871 */ 859 */
872} 860}
873 861
874static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end)
875{
876 pgd_t *pgd_entry = root + pgd_index(addr);
877
878 unmap_pud_range(pgd_entry, addr, end);
879
880 if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry)))
881 pgd_clear(pgd_entry);
882}
883
884static int alloc_pte_page(pmd_t *pmd) 862static int alloc_pte_page(pmd_t *pmd)
885{ 863{
886 pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); 864 pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
@@ -1113,7 +1091,12 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
1113 1091
1114 ret = populate_pud(cpa, addr, pgd_entry, pgprot); 1092 ret = populate_pud(cpa, addr, pgd_entry, pgprot);
1115 if (ret < 0) { 1093 if (ret < 0) {
1116 unmap_pgd_range(cpa->pgd, addr, 1094 /*
1095 * Leave the PUD page in place in case some other CPU or thread
1096 * already found it, but remove any useless entries we just
1097 * added to it.
1098 */
1099 unmap_pud_range(pgd_entry, addr,
1117 addr + (cpa->numpages << PAGE_SHIFT)); 1100 addr + (cpa->numpages << PAGE_SHIFT));
1118 return ret; 1101 return ret;
1119 } 1102 }
@@ -1185,7 +1168,7 @@ repeat:
1185 return __cpa_process_fault(cpa, address, primary); 1168 return __cpa_process_fault(cpa, address, primary);
1186 1169
1187 old_pte = *kpte; 1170 old_pte = *kpte;
1188 if (!pte_val(old_pte)) 1171 if (pte_none(old_pte))
1189 return __cpa_process_fault(cpa, address, primary); 1172 return __cpa_process_fault(cpa, address, primary);
1190 1173
1191 if (level == PG_LEVEL_4K) { 1174 if (level == PG_LEVEL_4K) {
@@ -1991,12 +1974,6 @@ out:
1991 return retval; 1974 return retval;
1992} 1975}
1993 1976
1994void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,
1995 unsigned numpages)
1996{
1997 unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT));
1998}
1999
2000/* 1977/*
2001 * The testcases use internal knowledge of the implementation that shouldn't 1978 * The testcases use internal knowledge of the implementation that shouldn't
2002 * be exposed to the rest of the kernel. Include these directly here. 1979 * be exposed to the rest of the kernel. Include these directly here.
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index fb0604f11eec..db00e3e2f3dc 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -755,11 +755,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
755 return 1; 755 return 1;
756 756
757 while (cursor < to) { 757 while (cursor < to) {
758 if (!devmem_is_allowed(pfn)) { 758 if (!devmem_is_allowed(pfn))
759 pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
760 current->comm, from, to - 1);
761 return 0; 759 return 0;
762 }
763 cursor += PAGE_SIZE; 760 cursor += PAGE_SIZE;
764 pfn++; 761 pfn++;
765 } 762 }
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 75cc0978d45d..e67ae0e6c59d 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -47,7 +47,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
47 return; 47 return;
48 } 48 }
49 pte = pte_offset_kernel(pmd, vaddr); 49 pte = pte_offset_kernel(pmd, vaddr);
50 if (pte_val(pteval)) 50 if (!pte_none(pteval))
51 set_pte_at(&init_mm, vaddr, pte, pteval); 51 set_pte_at(&init_mm, vaddr, pte, pteval);
52 else 52 else
53 pte_clear(&init_mm, vaddr, pte); 53 pte_clear(&init_mm, vaddr, pte);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index d898b334ff46..17c8bbd4e2f0 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -963,8 +963,6 @@ static void __init __efi_enter_virtual_mode(void)
963 * EFI mixed mode we need all of memory to be accessible when 963 * EFI mixed mode we need all of memory to be accessible when
964 * we pass parameters to the EFI runtime services in the 964 * we pass parameters to the EFI runtime services in the
965 * thunking code. 965 * thunking code.
966 *
967 * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);
968 */ 966 */
969 free_pages((unsigned long)new_memmap, pg_shift); 967 free_pages((unsigned long)new_memmap, pg_shift);
970 968
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 338402b91d2e..cef39b097649 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -49,9 +49,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
49{ 49{
50 return 0; 50 return 0;
51} 51}
52void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
53{
54}
55 52
56void __init efi_map_region(efi_memory_desc_t *md) 53void __init efi_map_region(efi_memory_desc_t *md)
57{ 54{
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 5cb4301c4dcf..3e12c44f88a2 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -285,11 +285,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
285 return 0; 285 return 0;
286} 286}
287 287
288void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
289{
290 kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages);
291}
292
293static void __init __map_region(efi_memory_desc_t *md, u64 va) 288static void __init __map_region(efi_memory_desc_t *md, u64 va)
294{ 289{
295 unsigned long flags = _PAGE_RW; 290 unsigned long flags = _PAGE_RW;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 760789ae8562..0f87db2cc6a8 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -521,9 +521,7 @@ static void set_aliased_prot(void *v, pgprot_t prot)
521 521
522 preempt_disable(); 522 preempt_disable();
523 523
524 pagefault_disable(); /* Avoid warnings due to being atomic. */ 524 probe_kernel_read(&dummy, v, 1);
525 __get_user(dummy, (unsigned char __user __force *)v);
526 pagefault_enable();
527 525
528 if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) 526 if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
529 BUG(); 527 BUG();
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 71025c2f6bbb..d633974e7f8b 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -66,12 +66,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
66 u64 cursor = from; 66 u64 cursor = from;
67 67
68 while (cursor < to) { 68 while (cursor < to) {
69 if (!devmem_is_allowed(pfn)) { 69 if (!devmem_is_allowed(pfn))
70 printk(KERN_INFO
71 "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
72 current->comm, from, to);
73 return 0; 70 return 0;
74 }
75 cursor += PAGE_SIZE; 71 cursor += PAGE_SIZE;
76 pfn++; 72 pfn++;
77 } 73 }
diff --git a/drivers/pnp/isapnp/proc.c b/drivers/pnp/isapnp/proc.c
index 5edee645d890..262285e48a09 100644
--- a/drivers/pnp/isapnp/proc.c
+++ b/drivers/pnp/isapnp/proc.c
@@ -21,7 +21,7 @@
21#include <linux/isapnp.h> 21#include <linux/isapnp.h>
22#include <linux/proc_fs.h> 22#include <linux/proc_fs.h>
23#include <linux/init.h> 23#include <linux/init.h>
24#include <asm/uaccess.h> 24#include <linux/uaccess.h>
25 25
26extern struct pnp_protocol isapnp_protocol; 26extern struct pnp_protocol isapnp_protocol;
27 27
diff --git a/fs/read_write.c b/fs/read_write.c
index 933b53a375b4..66215a7b17cf 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1168,6 +1168,15 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
1168 return do_compat_preadv64(fd, vec, vlen, pos, 0); 1168 return do_compat_preadv64(fd, vec, vlen, pos, 0);
1169} 1169}
1170 1170
1171#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
1172COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
1173 const struct compat_iovec __user *,vec,
1174 unsigned long, vlen, loff_t, pos, int, flags)
1175{
1176 return do_compat_preadv64(fd, vec, vlen, pos, flags);
1177}
1178#endif
1179
1171COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, 1180COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
1172 const struct compat_iovec __user *,vec, 1181 const struct compat_iovec __user *,vec,
1173 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, 1182 compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
@@ -1265,6 +1274,15 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
1265 return do_compat_pwritev64(fd, vec, vlen, pos, 0); 1274 return do_compat_pwritev64(fd, vec, vlen, pos, 0);
1266} 1275}
1267 1276
1277#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
1278COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
1279 const struct compat_iovec __user *,vec,
1280 unsigned long, vlen, loff_t, pos, int, flags)
1281{
1282 return do_compat_pwritev64(fd, vec, vlen, pos, flags);
1283}
1284#endif
1285
1268COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, 1286COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
1269 const struct compat_iovec __user *,vec, 1287 const struct compat_iovec __user *,vec,
1270 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags) 1288 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags)
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index d259274238db..d9aef2a0ec8e 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -31,6 +31,19 @@ static inline void user_exit(void)
31 context_tracking_exit(CONTEXT_USER); 31 context_tracking_exit(CONTEXT_USER);
32} 32}
33 33
34/* Called with interrupts disabled. */
35static inline void user_enter_irqoff(void)
36{
37 if (context_tracking_is_enabled())
38 __context_tracking_enter(CONTEXT_USER);
39
40}
41static inline void user_exit_irqoff(void)
42{
43 if (context_tracking_is_enabled())
44 __context_tracking_exit(CONTEXT_USER);
45}
46
34static inline enum ctx_state exception_enter(void) 47static inline enum ctx_state exception_enter(void)
35{ 48{
36 enum ctx_state prev_ctx; 49 enum ctx_state prev_ctx;
@@ -69,6 +82,8 @@ static inline enum ctx_state ct_state(void)
69#else 82#else
70static inline void user_enter(void) { } 83static inline void user_enter(void) { }
71static inline void user_exit(void) { } 84static inline void user_exit(void) { }
85static inline void user_enter_irqoff(void) { }
86static inline void user_exit_irqoff(void) { }
72static inline enum ctx_state exception_enter(void) { return 0; } 87static inline enum ctx_state exception_enter(void) { return 0; }
73static inline void exception_exit(enum ctx_state prev_ctx) { } 88static inline void exception_exit(enum ctx_state prev_ctx) { }
74static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; } 89static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ca3e517980a0..917f2b6a0cde 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -594,6 +594,9 @@ struct vm_special_mapping {
594 int (*fault)(const struct vm_special_mapping *sm, 594 int (*fault)(const struct vm_special_mapping *sm,
595 struct vm_area_struct *vma, 595 struct vm_area_struct *vma,
596 struct vm_fault *vmf); 596 struct vm_fault *vmf);
597
598 int (*mremap)(const struct vm_special_mapping *sm,
599 struct vm_area_struct *new_vma);
597}; 600};
598 601
599enum tlb_flush_reason { 602enum tlb_flush_reason {
diff --git a/include/linux/random.h b/include/linux/random.h
index e47e533742b5..3d6e9815cd85 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -95,27 +95,27 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
95#ifdef CONFIG_ARCH_RANDOM 95#ifdef CONFIG_ARCH_RANDOM
96# include <asm/archrandom.h> 96# include <asm/archrandom.h>
97#else 97#else
98static inline int arch_get_random_long(unsigned long *v) 98static inline bool arch_get_random_long(unsigned long *v)
99{ 99{
100 return 0; 100 return 0;
101} 101}
102static inline int arch_get_random_int(unsigned int *v) 102static inline bool arch_get_random_int(unsigned int *v)
103{ 103{
104 return 0; 104 return 0;
105} 105}
106static inline int arch_has_random(void) 106static inline bool arch_has_random(void)
107{ 107{
108 return 0; 108 return 0;
109} 109}
110static inline int arch_get_random_seed_long(unsigned long *v) 110static inline bool arch_get_random_seed_long(unsigned long *v)
111{ 111{
112 return 0; 112 return 0;
113} 113}
114static inline int arch_get_random_seed_int(unsigned int *v) 114static inline bool arch_get_random_seed_int(unsigned int *v)
115{ 115{
116 return 0; 116 return 0;
117} 117}
118static inline int arch_has_random_seed(void) 118static inline bool arch_has_random_seed(void)
119{ 119{
120 return 0; 120 return 0;
121} 121}
diff --git a/lib/Makefile b/lib/Makefile
index ff6a7a6c6395..07d06a8b9788 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -15,9 +15,6 @@ KCOV_INSTRUMENT_rbtree.o := n
15KCOV_INSTRUMENT_list_debug.o := n 15KCOV_INSTRUMENT_list_debug.o := n
16KCOV_INSTRUMENT_debugobjects.o := n 16KCOV_INSTRUMENT_debugobjects.o := n
17KCOV_INSTRUMENT_dynamic_debug.o := n 17KCOV_INSTRUMENT_dynamic_debug.o := n
18# Kernel does not boot if we instrument this file as it uses custom calling
19# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
20KCOV_INSTRUMENT_hweight.o := n
21 18
22lib-y := ctype.o string.o vsprintf.o cmdline.o \ 19lib-y := ctype.o string.o vsprintf.o cmdline.o \
23 rbtree.o radix-tree.o dump_stack.o timerqueue.o\ 20 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
@@ -74,8 +71,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
74obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o 71obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
75obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o 72obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
76 73
77GCOV_PROFILE_hweight.o := n
78CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
79obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o 74obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
80 75
81obj-$(CONFIG_BTREE) += btree.o 76obj-$(CONFIG_BTREE) += btree.o
diff --git a/lib/bitmap.c b/lib/bitmap.c
index c66da508cbf7..eca88087fa8a 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -14,9 +14,9 @@
14#include <linux/bug.h> 14#include <linux/bug.h>
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/string.h> 16#include <linux/string.h>
17#include <linux/uaccess.h>
17 18
18#include <asm/page.h> 19#include <asm/page.h>
19#include <asm/uaccess.h>
20 20
21/* 21/*
22 * bitmaps provide an array of bits, implemented using an an 22 * bitmaps provide an array of bits, implemented using an an
diff --git a/lib/hweight.c b/lib/hweight.c
index 9a5c1f221558..43273a7d83cf 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,6 +9,7 @@
9 * The Hamming Weight of a number is the total number of bits set in it. 9 * The Hamming Weight of a number is the total number of bits set in it.
10 */ 10 */
11 11
12#ifndef __HAVE_ARCH_SW_HWEIGHT
12unsigned int __sw_hweight32(unsigned int w) 13unsigned int __sw_hweight32(unsigned int w)
13{ 14{
14#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER 15#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
@@ -25,6 +26,7 @@ unsigned int __sw_hweight32(unsigned int w)
25#endif 26#endif
26} 27}
27EXPORT_SYMBOL(__sw_hweight32); 28EXPORT_SYMBOL(__sw_hweight32);
29#endif
28 30
29unsigned int __sw_hweight16(unsigned int w) 31unsigned int __sw_hweight16(unsigned int w)
30{ 32{
@@ -43,6 +45,7 @@ unsigned int __sw_hweight8(unsigned int w)
43} 45}
44EXPORT_SYMBOL(__sw_hweight8); 46EXPORT_SYMBOL(__sw_hweight8);
45 47
48#ifndef __HAVE_ARCH_SW_HWEIGHT
46unsigned long __sw_hweight64(__u64 w) 49unsigned long __sw_hweight64(__u64 w)
47{ 50{
48#if BITS_PER_LONG == 32 51#if BITS_PER_LONG == 32
@@ -65,3 +68,4 @@ unsigned long __sw_hweight64(__u64 w)
65#endif 68#endif
66} 69}
67EXPORT_SYMBOL(__sw_hweight64); 70EXPORT_SYMBOL(__sw_hweight64);
71#endif
diff --git a/mm/mmap.c b/mm/mmap.c
index de2c1769cc68..234edffec1d0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2943,9 +2943,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
2943 return ((struct vm_special_mapping *)vma->vm_private_data)->name; 2943 return ((struct vm_special_mapping *)vma->vm_private_data)->name;
2944} 2944}
2945 2945
2946static int special_mapping_mremap(struct vm_area_struct *new_vma)
2947{
2948 struct vm_special_mapping *sm = new_vma->vm_private_data;
2949
2950 if (sm->mremap)
2951 return sm->mremap(sm, new_vma);
2952 return 0;
2953}
2954
2946static const struct vm_operations_struct special_mapping_vmops = { 2955static const struct vm_operations_struct special_mapping_vmops = {
2947 .close = special_mapping_close, 2956 .close = special_mapping_close,
2948 .fault = special_mapping_fault, 2957 .fault = special_mapping_fault,
2958 .mremap = special_mapping_mremap,
2949 .name = special_mapping_name, 2959 .name = special_mapping_name,
2950}; 2960};
2951 2961
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index c73425de3cfe..4f747ee07f10 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -4,8 +4,8 @@ include ../lib.mk
4 4
5.PHONY: all all_32 all_64 warn_32bit_failure clean 5.PHONY: all all_32 all_64 warn_32bit_failure clean
6 6
7TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \ 7TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
8 check_initial_reg_state sigreturn ldt_gdt iopl 8 check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test
9TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ 9TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
10 test_FCMOV test_FCOMI test_FISTTP \ 10 test_FCMOV test_FCOMI test_FISTTP \
11 vdso_restorer 11 vdso_restorer
diff --git a/tools/testing/selftests/x86/mpx-debug.h b/tools/testing/selftests/x86/mpx-debug.h
new file mode 100644
index 000000000000..9230981f2e12
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-debug.h
@@ -0,0 +1,14 @@
1#ifndef _MPX_DEBUG_H
2#define _MPX_DEBUG_H
3
4#ifndef DEBUG_LEVEL
5#define DEBUG_LEVEL 0
6#endif
7#define dprintf_level(level, args...) do { if(level <= DEBUG_LEVEL) printf(args); } while(0)
8#define dprintf1(args...) dprintf_level(1, args)
9#define dprintf2(args...) dprintf_level(2, args)
10#define dprintf3(args...) dprintf_level(3, args)
11#define dprintf4(args...) dprintf_level(4, args)
12#define dprintf5(args...) dprintf_level(5, args)
13
14#endif /* _MPX_DEBUG_H */
diff --git a/tools/testing/selftests/x86/mpx-dig.c b/tools/testing/selftests/x86/mpx-dig.c
new file mode 100644
index 000000000000..ce85356d7e2e
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-dig.c
@@ -0,0 +1,498 @@
1/*
2 * Written by Dave Hansen <dave.hansen@intel.com>
3 */
4
5#include <stdlib.h>
6#include <sys/types.h>
7#include <unistd.h>
8#include <stdio.h>
9#include <errno.h>
10#include <sys/types.h>
11#include <sys/stat.h>
12#include <unistd.h>
13#include <sys/mman.h>
14#include <string.h>
15#include <fcntl.h>
16#include "mpx-debug.h"
17#include "mpx-mm.h"
18#include "mpx-hw.h"
19
20unsigned long bounds_dir_global;
21
22#define mpx_dig_abort() __mpx_dig_abort(__FILE__, __func__, __LINE__)
23static void inline __mpx_dig_abort(const char *file, const char *func, int line)
24{
25 fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func);
26 printf("MPX dig abort @ %s::%d in %s()\n", file, line, func);
27 abort();
28}
29
30/*
31 * run like this (BDIR finds the probably bounds directory):
32 *
33 * BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \
34 * | head -1 | awk -F- '{print $1}')";
35 * ./mpx-dig $pid 0x$BDIR
36 *
37 * NOTE:
38 * assumes that the only 2097152-kb VMA is the bounds dir
39 */
40
41long nr_incore(void *ptr, unsigned long size_bytes)
42{
43 int i;
44 long ret = 0;
45 long vec_len = size_bytes / PAGE_SIZE;
46 unsigned char *vec = malloc(vec_len);
47 int incore_ret;
48
49 if (!vec)
50 mpx_dig_abort();
51
52 incore_ret = mincore(ptr, size_bytes, vec);
53 if (incore_ret) {
54 printf("mincore ret: %d\n", incore_ret);
55 perror("mincore");
56 mpx_dig_abort();
57 }
58 for (i = 0; i < vec_len; i++)
59 ret += vec[i];
60 free(vec);
61 return ret;
62}
63
64int open_proc(int pid, char *file)
65{
66 static char buf[100];
67 int fd;
68
69 snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file);
70 fd = open(&buf[0], O_RDONLY);
71 if (fd < 0)
72 perror(buf);
73
74 return fd;
75}
76
77struct vaddr_range {
78 unsigned long start;
79 unsigned long end;
80};
81struct vaddr_range *ranges;
82int nr_ranges_allocated;
83int nr_ranges_populated;
84int last_range = -1;
85
86int __pid_load_vaddrs(int pid)
87{
88 int ret = 0;
89 int proc_maps_fd = open_proc(pid, "maps");
90 char linebuf[10000];
91 unsigned long start;
92 unsigned long end;
93 char rest[1000];
94 FILE *f = fdopen(proc_maps_fd, "r");
95
96 if (!f)
97 mpx_dig_abort();
98 nr_ranges_populated = 0;
99 while (!feof(f)) {
100 char *readret = fgets(linebuf, sizeof(linebuf), f);
101 int parsed;
102
103 if (readret == NULL) {
104 if (feof(f))
105 break;
106 mpx_dig_abort();
107 }
108
109 parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest);
110 if (parsed != 3)
111 mpx_dig_abort();
112
113 dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest);
114 if (nr_ranges_populated >= nr_ranges_allocated) {
115 ret = -E2BIG;
116 break;
117 }
118 ranges[nr_ranges_populated].start = start;
119 ranges[nr_ranges_populated].end = end;
120 nr_ranges_populated++;
121 }
122 last_range = -1;
123 fclose(f);
124 close(proc_maps_fd);
125 return ret;
126}
127
128int pid_load_vaddrs(int pid)
129{
130 int ret;
131
132 dprintf2("%s(%d)\n", __func__, pid);
133 if (!ranges) {
134 nr_ranges_allocated = 4;
135 ranges = malloc(nr_ranges_allocated * sizeof(ranges[0]));
136 dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid,
137 nr_ranges_allocated, ranges);
138 assert(ranges != NULL);
139 }
140 do {
141 ret = __pid_load_vaddrs(pid);
142 if (!ret)
143 break;
144 if (ret == -E2BIG) {
145 dprintf2("%s(%d) need to realloc\n", __func__, pid);
146 nr_ranges_allocated *= 2;
147 ranges = realloc(ranges,
148 nr_ranges_allocated * sizeof(ranges[0]));
149 dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__,
150 pid, nr_ranges_allocated, ranges);
151 assert(ranges != NULL);
152 dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated);
153 }
154 } while (1);
155
156 dprintf2("%s(%d) done\n", __func__, pid);
157
158 return ret;
159}
160
161static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r)
162{
163 if (vaddr < r->start)
164 return 0;
165 if (vaddr >= r->end)
166 return 0;
167 return 1;
168}
169
170static inline int vaddr_mapped_by_range(unsigned long vaddr)
171{
172 int i;
173
174 if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range]))
175 return 1;
176
177 for (i = 0; i < nr_ranges_populated; i++) {
178 struct vaddr_range *r = &ranges[i];
179
180 if (vaddr_in_range(vaddr, r))
181 continue;
182 last_range = i;
183 return 1;
184 }
185 return 0;
186}
187
188const int bt_entry_size_bytes = sizeof(unsigned long) * 4;
189
190void *read_bounds_table_into_buf(unsigned long table_vaddr)
191{
192#ifdef MPX_DIG_STANDALONE
193 static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES];
194 off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET);
195 if (seek_ret != table_vaddr)
196 mpx_dig_abort();
197
198 int read_ret = read(fd, &bt_buf, sizeof(bt_buf));
199 if (read_ret != sizeof(bt_buf))
200 mpx_dig_abort();
201 return &bt_buf;
202#else
203 return (void *)table_vaddr;
204#endif
205}
206
207int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr,
208 unsigned long bde_vaddr)
209{
210 unsigned long offset_inside_bt;
211 int nr_entries = 0;
212 int do_abort = 0;
213 char *bt_buf;
214
215 dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n",
216 __func__, base_controlled_vaddr, bde_vaddr);
217
218 bt_buf = read_bounds_table_into_buf(table_vaddr);
219
220 dprintf4("%s() read done\n", __func__);
221
222 for (offset_inside_bt = 0;
223 offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES;
224 offset_inside_bt += bt_entry_size_bytes) {
225 unsigned long bt_entry_index;
226 unsigned long bt_entry_controls;
227 unsigned long this_bt_entry_for_vaddr;
228 unsigned long *bt_entry_buf;
229 int i;
230
231 dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__,
232 offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES);
233 bt_entry_buf = (void *)&bt_buf[offset_inside_bt];
234 if (!bt_buf) {
235 printf("null bt_buf\n");
236 mpx_dig_abort();
237 }
238 if (!bt_entry_buf) {
239 printf("null bt_entry_buf\n");
240 mpx_dig_abort();
241 }
242 dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__,
243 bt_entry_buf);
244 if (!bt_entry_buf[0] &&
245 !bt_entry_buf[1] &&
246 !bt_entry_buf[2] &&
247 !bt_entry_buf[3])
248 continue;
249
250 nr_entries++;
251
252 bt_entry_index = offset_inside_bt/bt_entry_size_bytes;
253 bt_entry_controls = sizeof(void *);
254 this_bt_entry_for_vaddr =
255 base_controlled_vaddr + bt_entry_index*bt_entry_controls;
256 /*
257 * We sign extend vaddr bits 48->63 which effectively
258 * creates a hole in the virtual address space.
259 * This calculation corrects for the hole.
260 */
261 if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL)
262 this_bt_entry_for_vaddr |= 0xffff800000000000;
263
264 if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) {
265 printf("bt_entry_buf: %p\n", bt_entry_buf);
266 printf("there is a bte for %lx but no mapping\n",
267 this_bt_entry_for_vaddr);
268 printf(" bde vaddr: %016lx\n", bde_vaddr);
269 printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr);
270 printf(" table_vaddr: %016lx\n", table_vaddr);
271 printf(" entry vaddr: %016lx @ offset %lx\n",
272 table_vaddr + offset_inside_bt, offset_inside_bt);
273 do_abort = 1;
274 mpx_dig_abort();
275 }
276 if (DEBUG_LEVEL < 4)
277 continue;
278
279 printf("table entry[%lx]: ", offset_inside_bt);
280 for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long))
281 printf("0x%016lx ", bt_entry_buf[i]);
282 printf("\n");
283 }
284 if (do_abort)
285 mpx_dig_abort();
286 dprintf4("%s() done\n", __func__);
287 return nr_entries;
288}
289
290int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes,
291 int *nr_populated_bdes)
292{
293 unsigned long i;
294 int total_entries = 0;
295
296 dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf,
297 len_bytes, bd_offset_bytes, buf + len_bytes);
298
299 for (i = 0; i < len_bytes; i += sizeof(unsigned long)) {
300 unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long);
301 unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i];
302 unsigned long bounds_dir_entry;
303 unsigned long bd_for_vaddr;
304 unsigned long bt_start;
305 unsigned long bt_tail;
306 int nr_entries;
307
308 dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i,
309 bounds_dir_entry_ptr);
310
311 bounds_dir_entry = *bounds_dir_entry_ptr;
312 if (!bounds_dir_entry) {
313 dprintf4("no bounds dir at index 0x%lx / 0x%lx "
314 "start at offset:%lx %lx\n", bd_index, bd_index,
315 bd_offset_bytes, i);
316 continue;
317 }
318 dprintf3("found bounds_dir_entry: 0x%lx @ "
319 "index 0x%lx buf ptr: %p\n", bounds_dir_entry, i,
320 &buf[i]);
321 /* mask off the enable bit: */
322 bounds_dir_entry &= ~0x1;
323 (*nr_populated_bdes)++;
324 dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes);
325 dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes);
326
327 bt_start = bounds_dir_entry;
328 bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1;
329 if (!vaddr_mapped_by_range(bt_start)) {
330 printf("bounds directory 0x%lx points to nowhere\n",
331 bounds_dir_entry);
332 mpx_dig_abort();
333 }
334 if (!vaddr_mapped_by_range(bt_tail)) {
335 printf("bounds directory end 0x%lx points to nowhere\n",
336 bt_tail);
337 mpx_dig_abort();
338 }
339 /*
340 * Each bounds directory entry controls 1MB of virtual address
341 * space. This variable is the virtual address in the process
342 * of the beginning of the area controlled by this bounds_dir.
343 */
344 bd_for_vaddr = bd_index * (1UL<<20);
345
346 nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr,
347 bounds_dir_global+bd_offset_bytes+i);
348 total_entries += nr_entries;
349 dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries "
350 "total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n",
351 bd_index, buf+i,
352 bounds_dir_entry, nr_entries, total_entries,
353 bd_for_vaddr, bd_for_vaddr + (1UL<<20));
354 }
355 dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes,
356 bd_offset_bytes);
357 return total_entries;
358}
359
360int proc_pid_mem_fd = -1;
361
362void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir,
363 long buffer_size_bytes, void *buffer)
364{
365 unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir;
366 int read_ret;
367 off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET);
368
369 if (seek_ret != seekto)
370 mpx_dig_abort();
371
372 read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes);
373 /* there shouldn't practically be short reads of /proc/$pid/mem */
374 if (read_ret != buffer_size_bytes)
375 mpx_dig_abort();
376
377 return buffer;
378}
379void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir,
380 long buffer_size_bytes, void *buffer)
381
382{
383 unsigned char vec[buffer_size_bytes / PAGE_SIZE];
384 char *dig_bounds_dir_ptr =
385 (void *)(bounds_dir_global + byte_offset_inside_bounds_dir);
386 /*
387 * use mincore() to quickly find the areas of the bounds directory
388 * that have memory and thus will be worth scanning.
389 */
390 int incore_ret;
391
392 int incore = 0;
393 int i;
394
395 dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr);
396
397 incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]);
398 if (incore_ret) {
399 printf("mincore ret: %d\n", incore_ret);
400 perror("mincore");
401 mpx_dig_abort();
402 }
403 for (i = 0; i < sizeof(vec); i++)
404 incore += vec[i];
405 dprintf4("%s() total incore: %d\n", __func__, incore);
406 if (!incore)
407 return NULL;
408 dprintf3("%s() total incore: %d\n", __func__, incore);
409 return dig_bounds_dir_ptr;
410}
411
412int inspect_pid(int pid)
413{
414 static int dig_nr;
415 long offset_inside_bounds_dir;
416 char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)];
417 char *dig_bounds_dir_ptr;
418 int total_entries = 0;
419 int nr_populated_bdes = 0;
420 int inspect_self;
421
422 if (getpid() == pid) {
423 dprintf4("inspecting self\n");
424 inspect_self = 1;
425 } else {
426 dprintf4("inspecting pid %d\n", pid);
427 mpx_dig_abort();
428 }
429
430 for (offset_inside_bounds_dir = 0;
431 offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES;
432 offset_inside_bounds_dir += sizeof(bounds_dir_buf)) {
433 static int bufs_skipped;
434 int this_entries;
435
436 if (inspect_self) {
437 dig_bounds_dir_ptr =
438 fill_bounds_dir_buf_self(offset_inside_bounds_dir,
439 sizeof(bounds_dir_buf),
440 &bounds_dir_buf[0]);
441 } else {
442 dig_bounds_dir_ptr =
443 fill_bounds_dir_buf_other(offset_inside_bounds_dir,
444 sizeof(bounds_dir_buf),
445 &bounds_dir_buf[0]);
446 }
447 if (!dig_bounds_dir_ptr) {
448 bufs_skipped++;
449 continue;
450 }
451 this_entries = search_bd_buf(dig_bounds_dir_ptr,
452 sizeof(bounds_dir_buf),
453 offset_inside_bounds_dir,
454 &nr_populated_bdes);
455 total_entries += this_entries;
456 }
457 printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr,
458 total_entries, nr_populated_bdes);
459 return total_entries + nr_populated_bdes;
460}
461
462#ifdef MPX_DIG_REMOTE
463int main(int argc, char **argv)
464{
465 int err;
466 char *c;
467 unsigned long bounds_dir_entry;
468 int pid;
469
470 printf("mpx-dig starting...\n");
471 err = sscanf(argv[1], "%d", &pid);
472 printf("parsing: '%s', err: %d\n", argv[1], err);
473 if (err != 1)
474 mpx_dig_abort();
475
476 err = sscanf(argv[2], "%lx", &bounds_dir_global);
477 printf("parsing: '%s': %d\n", argv[2], err);
478 if (err != 1)
479 mpx_dig_abort();
480
481 proc_pid_mem_fd = open_proc(pid, "mem");
482 if (proc_pid_mem_fd < 0)
483 mpx_dig_abort();
484
485 inspect_pid(pid);
486 return 0;
487}
488#endif
489
490long inspect_me(struct mpx_bounds_dir *bounds_dir)
491{
492 int pid = getpid();
493
494 pid_load_vaddrs(pid);
495 bounds_dir_global = (unsigned long)bounds_dir;
496 dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir);
497 return inspect_pid(pid);
498}
diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h
new file mode 100644
index 000000000000..093c190178a9
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-hw.h
@@ -0,0 +1,123 @@
1#ifndef _MPX_HW_H
2#define _MPX_HW_H
3
4#include <assert.h>
5
6/* Describe the MPX Hardware Layout in here */
7
8#define NR_MPX_BOUNDS_REGISTERS 4
9
10#ifdef __i386__
11
12#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 16 /* 4 * 32-bits */
13#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 14) /* 16k */
14#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 4
15#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 22) /* 4MB */
16
17#define MPX_BOUNDS_TABLE_BOTTOM_BIT 2
18#define MPX_BOUNDS_TABLE_TOP_BIT 11
19#define MPX_BOUNDS_DIR_BOTTOM_BIT 12
20#define MPX_BOUNDS_DIR_TOP_BIT 31
21
22#else
23
24/*
25 * Linear Address of "pointer" (LAp)
26 * 0 -> 2: ignored
27 * 3 -> 19: index in to bounds table
28 * 20 -> 47: index in to bounds directory
29 * 48 -> 63: ignored
30 */
31
32#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 32
33#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 22) /* 4MB */
34#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 8
35#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 31) /* 2GB */
36
37#define MPX_BOUNDS_TABLE_BOTTOM_BIT 3
38#define MPX_BOUNDS_TABLE_TOP_BIT 19
39#define MPX_BOUNDS_DIR_BOTTOM_BIT 20
40#define MPX_BOUNDS_DIR_TOP_BIT 47
41
42#endif
43
44#define MPX_BOUNDS_DIR_NR_ENTRIES \
45 (MPX_BOUNDS_DIR_SIZE_BYTES/MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES)
46#define MPX_BOUNDS_TABLE_NR_ENTRIES \
47 (MPX_BOUNDS_TABLE_SIZE_BYTES/MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES)
48
49#define MPX_BOUNDS_TABLE_ENTRY_VALID_BIT 0x1
50
51struct mpx_bd_entry {
52 union {
53 char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES];
54 void *contents[1];
55 };
56} __attribute__((packed));
57
58struct mpx_bt_entry {
59 union {
60 char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES];
61 unsigned long contents[1];
62 };
63} __attribute__((packed));
64
65struct mpx_bounds_dir {
66 struct mpx_bd_entry entries[MPX_BOUNDS_DIR_NR_ENTRIES];
67} __attribute__((packed));
68
69struct mpx_bounds_table {
70 struct mpx_bt_entry entries[MPX_BOUNDS_TABLE_NR_ENTRIES];
71} __attribute__((packed));
72
73static inline unsigned long GET_BITS(unsigned long val, int bottombit, int topbit)
74{
75 int total_nr_bits = topbit - bottombit;
76 unsigned long mask = (1UL << total_nr_bits)-1;
77 return (val >> bottombit) & mask;
78}
79
80static inline unsigned long __vaddr_bounds_table_index(void *vaddr)
81{
82 return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_TABLE_BOTTOM_BIT,
83 MPX_BOUNDS_TABLE_TOP_BIT);
84}
85
86static inline unsigned long __vaddr_bounds_directory_index(void *vaddr)
87{
88 return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_DIR_BOTTOM_BIT,
89 MPX_BOUNDS_DIR_TOP_BIT);
90}
91
92static inline struct mpx_bd_entry *mpx_vaddr_to_bd_entry(void *vaddr,
93 struct mpx_bounds_dir *bounds_dir)
94{
95 unsigned long index = __vaddr_bounds_directory_index(vaddr);
96 return &bounds_dir->entries[index];
97}
98
99static inline int bd_entry_valid(struct mpx_bd_entry *bounds_dir_entry)
100{
101 unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
102 return (__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
103}
104
105static inline struct mpx_bounds_table *
106__bd_entry_to_bounds_table(struct mpx_bd_entry *bounds_dir_entry)
107{
108 unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
109 assert(__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
110 __bd_entry &= ~MPX_BOUNDS_TABLE_ENTRY_VALID_BIT;
111 return (struct mpx_bounds_table *)__bd_entry;
112}
113
114static inline struct mpx_bt_entry *
115mpx_vaddr_to_bt_entry(void *vaddr, struct mpx_bounds_dir *bounds_dir)
116{
117 struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(vaddr, bounds_dir);
118 struct mpx_bounds_table *bt = __bd_entry_to_bounds_table(bde);
119 unsigned long index = __vaddr_bounds_table_index(vaddr);
120 return &bt->entries[index];
121}
122
123#endif /* _MPX_HW_H */
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
new file mode 100644
index 000000000000..616ee9673339
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-mini-test.c
@@ -0,0 +1,1585 @@
1/*
2 * mpx-mini-test.c: routines to test Intel MPX (Memory Protection eXtentions)
3 *
4 * Written by:
5 * "Ren, Qiaowei" <qiaowei.ren@intel.com>
6 * "Wei, Gang" <gang.wei@intel.com>
7 * "Hansen, Dave" <dave.hansen@intel.com>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2.
12 */
13
14/*
15 * 2014-12-05: Dave Hansen: fixed all of the compiler warnings, and made sure
16 * it works on 32-bit.
17 */
18
19int inspect_every_this_many_mallocs = 100;
20int zap_all_every_this_many_mallocs = 1000;
21
22#define _GNU_SOURCE
23#define _LARGEFILE64_SOURCE
24
25#include <string.h>
26#include <stdio.h>
27#include <stdint.h>
28#include <stdbool.h>
29#include <signal.h>
30#include <assert.h>
31#include <stdlib.h>
32#include <ucontext.h>
33#include <sys/mman.h>
34#include <sys/types.h>
35#include <sys/stat.h>
36#include <fcntl.h>
37#include <unistd.h>
38
39#include "mpx-hw.h"
40#include "mpx-debug.h"
41#include "mpx-mm.h"
42
43#ifndef __always_inline
44#define __always_inline inline __attribute__((always_inline)
45#endif
46
47#ifndef TEST_DURATION_SECS
48#define TEST_DURATION_SECS 3
49#endif
50
51void write_int_to(char *prefix, char *file, int int_to_write)
52{
53 char buf[100];
54 int fd = open(file, O_RDWR);
55 int len;
56 int ret;
57
58 assert(fd >= 0);
59 len = snprintf(buf, sizeof(buf), "%s%d", prefix, int_to_write);
60 assert(len >= 0);
61 assert(len < sizeof(buf));
62 ret = write(fd, buf, len);
63 assert(ret == len);
64 ret = close(fd);
65 assert(!ret);
66}
67
68void write_pid_to(char *prefix, char *file)
69{
70 write_int_to(prefix, file, getpid());
71}
72
73void trace_me(void)
74{
75/* tracing events dir */
76#define TED "/sys/kernel/debug/tracing/events/"
77/*
78 write_pid_to("common_pid=", TED "signal/filter");
79 write_pid_to("common_pid=", TED "exceptions/filter");
80 write_int_to("", TED "signal/enable", 1);
81 write_int_to("", TED "exceptions/enable", 1);
82*/
83 write_pid_to("", "/sys/kernel/debug/tracing/set_ftrace_pid");
84 write_int_to("", "/sys/kernel/debug/tracing/trace", 0);
85}
86
87#define test_failed() __test_failed(__FILE__, __LINE__)
88static void __test_failed(char *f, int l)
89{
90 fprintf(stderr, "abort @ %s::%d\n", f, l);
91 abort();
92}
93
94/* Error Printf */
95#define eprintf(args...) fprintf(stderr, args)
96
97#ifdef __i386__
98
99/* i386 directory size is 4MB */
100#define REG_IP_IDX REG_EIP
101#define REX_PREFIX
102
103#define XSAVE_OFFSET_IN_FPMEM sizeof(struct _libc_fpstate)
104
105/*
106 * __cpuid() is from the Linux Kernel:
107 */
108static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
109 unsigned int *ecx, unsigned int *edx)
110{
111 /* ecx is often an input as well as an output. */
112 asm volatile(
113 "push %%ebx;"
114 "cpuid;"
115 "mov %%ebx, %1;"
116 "pop %%ebx"
117 : "=a" (*eax),
118 "=g" (*ebx),
119 "=c" (*ecx),
120 "=d" (*edx)
121 : "0" (*eax), "2" (*ecx));
122}
123
124#else /* __i386__ */
125
126#define REG_IP_IDX REG_RIP
127#define REX_PREFIX "0x48, "
128
129#define XSAVE_OFFSET_IN_FPMEM 0
130
131/*
132 * __cpuid() is from the Linux Kernel:
133 */
134static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
135 unsigned int *ecx, unsigned int *edx)
136{
137 /* ecx is often an input as well as an output. */
138 asm volatile(
139 "cpuid;"
140 : "=a" (*eax),
141 "=b" (*ebx),
142 "=c" (*ecx),
143 "=d" (*edx)
144 : "0" (*eax), "2" (*ecx));
145}
146
147#endif /* !__i386__ */
148
149struct xsave_hdr_struct {
150 uint64_t xstate_bv;
151 uint64_t reserved1[2];
152 uint64_t reserved2[5];
153} __attribute__((packed));
154
155struct bndregs_struct {
156 uint64_t bndregs[8];
157} __attribute__((packed));
158
159struct bndcsr_struct {
160 uint64_t cfg_reg_u;
161 uint64_t status_reg;
162} __attribute__((packed));
163
164struct xsave_struct {
165 uint8_t fpu_sse[512];
166 struct xsave_hdr_struct xsave_hdr;
167 uint8_t ymm[256];
168 uint8_t lwp[128];
169 struct bndregs_struct bndregs;
170 struct bndcsr_struct bndcsr;
171} __attribute__((packed));
172
173uint8_t __attribute__((__aligned__(64))) buffer[4096];
174struct xsave_struct *xsave_buf = (struct xsave_struct *)buffer;
175
176uint8_t __attribute__((__aligned__(64))) test_buffer[4096];
177struct xsave_struct *xsave_test_buf = (struct xsave_struct *)test_buffer;
178
179uint64_t num_bnd_chk;
180
181static __always_inline void xrstor_state(struct xsave_struct *fx, uint64_t mask)
182{
183 uint32_t lmask = mask;
184 uint32_t hmask = mask >> 32;
185
186 asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
187 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
188 : "memory");
189}
190
191static __always_inline void xsave_state_1(void *_fx, uint64_t mask)
192{
193 uint32_t lmask = mask;
194 uint32_t hmask = mask >> 32;
195 unsigned char *fx = _fx;
196
197 asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
198 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
199 : "memory");
200}
201
202static inline uint64_t xgetbv(uint32_t index)
203{
204 uint32_t eax, edx;
205
206 asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
207 : "=a" (eax), "=d" (edx)
208 : "c" (index));
209 return eax + ((uint64_t)edx << 32);
210}
211
212static uint64_t read_mpx_status_sig(ucontext_t *uctxt)
213{
214 memset(buffer, 0, sizeof(buffer));
215 memcpy(buffer,
216 (uint8_t *)uctxt->uc_mcontext.fpregs + XSAVE_OFFSET_IN_FPMEM,
217 sizeof(struct xsave_struct));
218
219 return xsave_buf->bndcsr.status_reg;
220}
221
222#include <pthread.h>
223
224static uint8_t *get_next_inst_ip(uint8_t *addr)
225{
226 uint8_t *ip = addr;
227 uint8_t sib;
228 uint8_t rm;
229 uint8_t mod;
230 uint8_t base;
231 uint8_t modrm;
232
233 /* determine the prefix. */
234 switch(*ip) {
235 case 0xf2:
236 case 0xf3:
237 case 0x66:
238 ip++;
239 break;
240 }
241
242 /* look for rex prefix */
243 if ((*ip & 0x40) == 0x40)
244 ip++;
245
246 /* Make sure we have a MPX instruction. */
247 if (*ip++ != 0x0f)
248 return addr;
249
250 /* Skip the op code byte. */
251 ip++;
252
253 /* Get the modrm byte. */
254 modrm = *ip++;
255
256 /* Break it down into parts. */
257 rm = modrm & 7;
258 mod = (modrm >> 6);
259
260 /* Init the parts of the address mode. */
261 base = 8;
262
263 /* Is it a mem mode? */
264 if (mod != 3) {
265 /* look for scaled indexed addressing */
266 if (rm == 4) {
267 /* SIB addressing */
268 sib = *ip++;
269 base = sib & 7;
270 switch (mod) {
271 case 0:
272 if (base == 5)
273 ip += 4;
274 break;
275
276 case 1:
277 ip++;
278 break;
279
280 case 2:
281 ip += 4;
282 break;
283 }
284
285 } else {
286 /* MODRM addressing */
287 switch (mod) {
288 case 0:
289 /* DISP32 addressing, no base */
290 if (rm == 5)
291 ip += 4;
292 break;
293
294 case 1:
295 ip++;
296 break;
297
298 case 2:
299 ip += 4;
300 break;
301 }
302 }
303 }
304 return ip;
305}
306
307#ifdef si_lower
308static inline void *__si_bounds_lower(siginfo_t *si)
309{
310 return si->si_lower;
311}
312
313static inline void *__si_bounds_upper(siginfo_t *si)
314{
315 return si->si_upper;
316}
317#else
318static inline void **__si_bounds_hack(siginfo_t *si)
319{
320 void *sigfault = &si->_sifields._sigfault;
321 void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
322 void **__si_lower = end_sigfault;
323
324 return __si_lower;
325}
326
327static inline void *__si_bounds_lower(siginfo_t *si)
328{
329 return *__si_bounds_hack(si);
330}
331
332static inline void *__si_bounds_upper(siginfo_t *si)
333{
334 return (*__si_bounds_hack(si)) + sizeof(void *);
335}
336#endif
337
338static int br_count;
339static int expected_bnd_index = -1;
340uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
341unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
342
343/*
344 * The kernel is supposed to provide some information about the bounds
345 * exception in the siginfo. It should match what we have in the bounds
346 * registers that we are checking against. Just check against the shadow copy
347 * since it is easily available, and we also check that *it* matches the real
348 * registers.
349 */
350void check_siginfo_vs_shadow(siginfo_t* si)
351{
352 int siginfo_ok = 1;
353 void *shadow_lower = (void *)(unsigned long)shadow_plb[expected_bnd_index][0];
354 void *shadow_upper = (void *)(unsigned long)shadow_plb[expected_bnd_index][1];
355
356 if ((expected_bnd_index < 0) ||
357 (expected_bnd_index >= NR_MPX_BOUNDS_REGISTERS)) {
358 fprintf(stderr, "ERROR: invalid expected_bnd_index: %d\n",
359 expected_bnd_index);
360 exit(6);
361 }
362 if (__si_bounds_lower(si) != shadow_lower)
363 siginfo_ok = 0;
364 if (__si_bounds_upper(si) != shadow_upper)
365 siginfo_ok = 0;
366
367 if (!siginfo_ok) {
368 fprintf(stderr, "ERROR: siginfo bounds do not match "
369 "shadow bounds for register %d\n", expected_bnd_index);
370 exit(7);
371 }
372}
373
374void handler(int signum, siginfo_t *si, void *vucontext)
375{
376 int i;
377 ucontext_t *uctxt = vucontext;
378 int trapno;
379 unsigned long ip;
380
381 dprintf1("entered signal handler\n");
382
383 trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
384 ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
385
386 if (trapno == 5) {
387 typeof(si->si_addr) *si_addr_ptr = &si->si_addr;
388 uint64_t status = read_mpx_status_sig(uctxt);
389 uint64_t br_reason = status & 0x3;
390
391 br_count++;
392 dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
393
394#define __SI_FAULT (3 << 16)
395#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */
396
397 dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
398 status, ip, br_reason);
399 dprintf2("si_signo: %d\n", si->si_signo);
400 dprintf2(" signum: %d\n", signum);
401 dprintf2("info->si_code == SEGV_BNDERR: %d\n",
402 (si->si_code == SEGV_BNDERR));
403 dprintf2("info->si_code: %d\n", si->si_code);
404 dprintf2("info->si_lower: %p\n", __si_bounds_lower(si));
405 dprintf2("info->si_upper: %p\n", __si_bounds_upper(si));
406
407 check_siginfo_vs_shadow(si);
408
409 for (i = 0; i < 8; i++)
410 dprintf3("[%d]: %p\n", i, si_addr_ptr[i]);
411 switch (br_reason) {
412 case 0: /* traditional BR */
413 fprintf(stderr,
414 "Undefined status with bound exception:%jx\n",
415 status);
416 exit(5);
417 case 1: /* #BR MPX bounds exception */
418 /* these are normal and we expect to see them */
419 dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n",
420 status, (void *)ip, si->si_addr);
421 num_bnd_chk++;
422 uctxt->uc_mcontext.gregs[REG_IP_IDX] =
423 (greg_t)get_next_inst_ip((uint8_t *)ip);
424 break;
425 case 2:
426 fprintf(stderr, "#BR status == 2, missing bounds table,"
427 "kernel should have handled!!\n");
428 exit(4);
429 break;
430 default:
431 fprintf(stderr, "bound check error: status 0x%jx at %p\n",
432 status, (void *)ip);
433 num_bnd_chk++;
434 uctxt->uc_mcontext.gregs[REG_IP_IDX] =
435 (greg_t)get_next_inst_ip((uint8_t *)ip);
436 fprintf(stderr, "bound check error: si_addr %p\n", si->si_addr);
437 exit(3);
438 }
439 } else if (trapno == 14) {
440 eprintf("ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
441 trapno, ip);
442 eprintf("si_addr %p\n", si->si_addr);
443 eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
444 test_failed();
445 } else {
446 eprintf("unexpected trap %d! at 0x%lx\n", trapno, ip);
447 eprintf("si_addr %p\n", si->si_addr);
448 eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
449 test_failed();
450 }
451}
452
453static inline void cpuid_count(unsigned int op, int count,
454 unsigned int *eax, unsigned int *ebx,
455 unsigned int *ecx, unsigned int *edx)
456{
457 *eax = op;
458 *ecx = count;
459 __cpuid(eax, ebx, ecx, edx);
460}
461
462#define XSTATE_CPUID 0x0000000d
463
464/*
465 * List of XSAVE features Linux knows about:
466 */
467enum xfeature_bit {
468 XSTATE_BIT_FP,
469 XSTATE_BIT_SSE,
470 XSTATE_BIT_YMM,
471 XSTATE_BIT_BNDREGS,
472 XSTATE_BIT_BNDCSR,
473 XSTATE_BIT_OPMASK,
474 XSTATE_BIT_ZMM_Hi256,
475 XSTATE_BIT_Hi16_ZMM,
476
477 XFEATURES_NR_MAX,
478};
479
480#define XSTATE_FP (1 << XSTATE_BIT_FP)
481#define XSTATE_SSE (1 << XSTATE_BIT_SSE)
482#define XSTATE_YMM (1 << XSTATE_BIT_YMM)
483#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS)
484#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR)
485#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK)
486#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256)
487#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM)
488
489#define MPX_XSTATES (XSTATE_BNDREGS | XSTATE_BNDCSR) /* 0x18 */
490
491bool one_bit(unsigned int x, int bit)
492{
493 return !!(x & (1<<bit));
494}
495
496void print_state_component(int state_bit_nr, char *name)
497{
498 unsigned int eax, ebx, ecx, edx;
499 unsigned int state_component_size;
500 unsigned int state_component_supervisor;
501 unsigned int state_component_user;
502 unsigned int state_component_aligned;
503
504 /* See SDM Section 13.2 */
505 cpuid_count(XSTATE_CPUID, state_bit_nr, &eax, &ebx, &ecx, &edx);
506 assert(eax || ebx || ecx);
507 state_component_size = eax;
508 state_component_supervisor = ((!ebx) && one_bit(ecx, 0));
509 state_component_user = !one_bit(ecx, 0);
510 state_component_aligned = one_bit(ecx, 1);
511 printf("%8s: size: %d user: %d supervisor: %d aligned: %d\n",
512 name,
513 state_component_size, state_component_user,
514 state_component_supervisor, state_component_aligned);
515
516}
517
518/* Intel-defined CPU features, CPUID level 0x00000001 (ecx) */
519#define XSAVE_FEATURE_BIT (26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
520#define OSXSAVE_FEATURE_BIT (27) /* XSAVE enabled in the OS */
521
522bool check_mpx_support(void)
523{
524 unsigned int eax, ebx, ecx, edx;
525
526 cpuid_count(1, 0, &eax, &ebx, &ecx, &edx);
527
528 /* We can't do much without XSAVE, so just make these assert()'s */
529 if (!one_bit(ecx, XSAVE_FEATURE_BIT)) {
530 fprintf(stderr, "processor lacks XSAVE, can not run MPX tests\n");
531 exit(0);
532 }
533
534 if (!one_bit(ecx, OSXSAVE_FEATURE_BIT)) {
535 fprintf(stderr, "processor lacks OSXSAVE, can not run MPX tests\n");
536 exit(0);
537 }
538
539 /* CPUs not supporting the XSTATE CPUID leaf do not support MPX */
540 /* Is this redundant with the feature bit checks? */
541 cpuid_count(0, 0, &eax, &ebx, &ecx, &edx);
542 if (eax < XSTATE_CPUID) {
543 fprintf(stderr, "processor lacks XSTATE CPUID leaf,"
544 " can not run MPX tests\n");
545 exit(0);
546 }
547
548 printf("XSAVE is supported by HW & OS\n");
549
550 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
551
552 printf("XSAVE processor supported state mask: 0x%x\n", eax);
553 printf("XSAVE OS supported state mask: 0x%jx\n", xgetbv(0));
554
555 /* Make sure that the MPX states are enabled in in XCR0 */
556 if ((eax & MPX_XSTATES) != MPX_XSTATES) {
557 fprintf(stderr, "processor lacks MPX XSTATE(s), can not run MPX tests\n");
558 exit(0);
559 }
560
561 /* Make sure the MPX states are supported by XSAVE* */
562 if ((xgetbv(0) & MPX_XSTATES) != MPX_XSTATES) {
563 fprintf(stderr, "MPX XSTATE(s) no enabled in XCR0, "
564 "can not run MPX tests\n");
565 exit(0);
566 }
567
568 print_state_component(XSTATE_BIT_BNDREGS, "BNDREGS");
569 print_state_component(XSTATE_BIT_BNDCSR, "BNDCSR");
570
571 return true;
572}
573
574void enable_mpx(void *l1base)
575{
576 /* enable point lookup */
577 memset(buffer, 0, sizeof(buffer));
578 xrstor_state(xsave_buf, 0x18);
579
580 xsave_buf->xsave_hdr.xstate_bv = 0x10;
581 xsave_buf->bndcsr.cfg_reg_u = (unsigned long)l1base | 1;
582 xsave_buf->bndcsr.status_reg = 0;
583
584 dprintf2("bf xrstor\n");
585 dprintf2("xsave cndcsr: status %jx, configu %jx\n",
586 xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
587 xrstor_state(xsave_buf, 0x18);
588 dprintf2("after xrstor\n");
589
590 xsave_state_1(xsave_buf, 0x18);
591
592 dprintf1("xsave bndcsr: status %jx, configu %jx\n",
593 xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
594}
595
596#include <sys/prctl.h>
597
598struct mpx_bounds_dir *bounds_dir_ptr;
599
600unsigned long __bd_incore(const char *func, int line)
601{
602 unsigned long ret = nr_incore(bounds_dir_ptr, MPX_BOUNDS_DIR_SIZE_BYTES);
603 return ret;
604}
605#define bd_incore() __bd_incore(__func__, __LINE__)
606
607void check_clear(void *ptr, unsigned long sz)
608{
609 unsigned long *i;
610
611 for (i = ptr; (void *)i < ptr + sz; i++) {
612 if (*i) {
613 dprintf1("%p is NOT clear at %p\n", ptr, i);
614 assert(0);
615 }
616 }
617 dprintf1("%p is clear for %lx\n", ptr, sz);
618}
619
620void check_clear_bd(void)
621{
622 check_clear(bounds_dir_ptr, 2UL << 30);
623}
624
625#define USE_MALLOC_FOR_BOUNDS_DIR 1
626bool process_specific_init(void)
627{
628 unsigned long size;
629 unsigned long *dir;
630 /* Guarantee we have the space to align it, add padding: */
631 unsigned long pad = getpagesize();
632
633 size = 2UL << 30; /* 2GB */
634 if (sizeof(unsigned long) == 4)
635 size = 4UL << 20; /* 4MB */
636 dprintf1("trying to allocate %ld MB bounds directory\n", (size >> 20));
637
638 if (USE_MALLOC_FOR_BOUNDS_DIR) {
639 unsigned long _dir;
640
641 dir = malloc(size + pad);
642 assert(dir);
643 _dir = (unsigned long)dir;
644 _dir += 0xfffUL;
645 _dir &= ~0xfffUL;
646 dir = (void *)_dir;
647 } else {
648 /*
649 * This makes debugging easier because the address
650 * calculations are simpler:
651 */
652 dir = mmap((void *)0x200000000000, size + pad,
653 PROT_READ|PROT_WRITE,
654 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
655 if (dir == (void *)-1) {
656 perror("unable to allocate bounds directory");
657 abort();
658 }
659 check_clear(dir, size);
660 }
661 bounds_dir_ptr = (void *)dir;
662 madvise(bounds_dir_ptr, size, MADV_NOHUGEPAGE);
663 bd_incore();
664 dprintf1("bounds directory: 0x%p -> 0x%p\n", bounds_dir_ptr,
665 (char *)bounds_dir_ptr + size);
666 check_clear(dir, size);
667 enable_mpx(dir);
668 check_clear(dir, size);
669 if (prctl(43, 0, 0, 0, 0)) {
670 printf("no MPX support\n");
671 abort();
672 return false;
673 }
674 return true;
675}
676
677bool process_specific_finish(void)
678{
679 if (prctl(44)) {
680 printf("no MPX support\n");
681 return false;
682 }
683 return true;
684}
685
686void setup_handler()
687{
688 int r, rs;
689 struct sigaction newact;
690 struct sigaction oldact;
691
692 /* #BR is mapped to sigsegv */
693 int signum = SIGSEGV;
694
695 newact.sa_handler = 0; /* void(*)(int)*/
696 newact.sa_sigaction = handler; /* void (*)(int, siginfo_t*, void *) */
697
698 /*sigset_t - signals to block while in the handler */
699 /* get the old signal mask. */
700 rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
701 assert(rs == 0);
702
703 /* call sa_sigaction, not sa_handler*/
704 newact.sa_flags = SA_SIGINFO;
705
706 newact.sa_restorer = 0; /* void(*)(), obsolete */
707 r = sigaction(signum, &newact, &oldact);
708 assert(r == 0);
709}
710
711void mpx_prepare(void)
712{
713 dprintf2("%s()\n", __func__);
714 setup_handler();
715 process_specific_init();
716}
717
718void mpx_cleanup(void)
719{
720 printf("%s(): %jd BRs. bye...\n", __func__, num_bnd_chk);
721 process_specific_finish();
722}
723
724/*-------------- the following is test case ---------------*/
725#include <stdint.h>
726#include <stdbool.h>
727#include <stdlib.h>
728#include <stdio.h>
729#include <time.h>
730
731uint64_t num_lower_brs;
732uint64_t num_upper_brs;
733
734#define MPX_CONFIG_OFFSET 1024
735#define MPX_BOUNDS_OFFSET 960
736#define MPX_HEADER_OFFSET 512
737#define MAX_ADDR_TESTED (1<<28)
738#define TEST_ROUNDS 100
739
740/*
741 0F 1A /r BNDLDX-Load
742 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation
743 66 0F 1A /r BNDMOV bnd1, bnd2/m128
744 66 0F 1B /r BNDMOV bnd1/m128, bnd2
745 F2 0F 1A /r BNDCU bnd, r/m64
746 F2 0F 1B /r BNDCN bnd, r/m64
747 F3 0F 1A /r BNDCL bnd, r/m64
748 F3 0F 1B /r BNDMK bnd, m64
749*/
750
751static __always_inline void xsave_state(void *_fx, uint64_t mask)
752{
753 uint32_t lmask = mask;
754 uint32_t hmask = mask >> 32;
755 unsigned char *fx = _fx;
756
757 asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
758 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
759 : "memory");
760}
761
762static __always_inline void mpx_clear_bnd0(void)
763{
764 long size = 0;
765 void *ptr = NULL;
766 /* F3 0F 1B /r BNDMK bnd, m64 */
767 /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */
768 asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
769 : : "c" (ptr), "d" (size-1)
770 : "memory");
771}
772
773static __always_inline void mpx_make_bound_helper(unsigned long ptr,
774 unsigned long size)
775{
776 /* F3 0F 1B /r BNDMK bnd, m64 */
777 /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */
778 asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
779 : : "c" (ptr), "d" (size-1)
780 : "memory");
781}
782
783static __always_inline void mpx_check_lowerbound_helper(unsigned long ptr)
784{
785 /* F3 0F 1A /r NDCL bnd, r/m64 */
786 /* f3 0f 1a 01 bndcl (%rcx),%bnd0 */
787 asm volatile(".byte 0xf3,0x0f,0x1a,0x01\n\t"
788 : : "c" (ptr)
789 : "memory");
790}
791
792static __always_inline void mpx_check_upperbound_helper(unsigned long ptr)
793{
794 /* F2 0F 1A /r BNDCU bnd, r/m64 */
795 /* f2 0f 1a 01 bndcu (%rcx),%bnd0 */
796 asm volatile(".byte 0xf2,0x0f,0x1a,0x01\n\t"
797 : : "c" (ptr)
798 : "memory");
799}
800
801static __always_inline void mpx_movbndreg_helper()
802{
803 /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */
804 /* 66 0f 1b c2 bndmov %bnd0,%bnd2 */
805
806 asm volatile(".byte 0x66,0x0f,0x1b,0xc2\n\t");
807}
808
809static __always_inline void mpx_movbnd2mem_helper(uint8_t *mem)
810{
811 /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */
812 /* 66 0f 1b 01 bndmov %bnd0,(%rcx) */
813 asm volatile(".byte 0x66,0x0f,0x1b,0x01\n\t"
814 : : "c" (mem)
815 : "memory");
816}
817
818static __always_inline void mpx_movbnd_from_mem_helper(uint8_t *mem)
819{
820 /* 66 0F 1A /r BNDMOV bnd1, bnd2/m128 */
821 /* 66 0f 1a 01 bndmov (%rcx),%bnd0 */
822 asm volatile(".byte 0x66,0x0f,0x1a,0x01\n\t"
823 : : "c" (mem)
824 : "memory");
825}
826
827static __always_inline void mpx_store_dsc_helper(unsigned long ptr_addr,
828 unsigned long ptr_val)
829{
830 /* 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation */
831 /* 0f 1b 04 11 bndstx %bnd0,(%rcx,%rdx,1) */
832 asm volatile(".byte 0x0f,0x1b,0x04,0x11\n\t"
833 : : "c" (ptr_addr), "d" (ptr_val)
834 : "memory");
835}
836
837static __always_inline void mpx_load_dsc_helper(unsigned long ptr_addr,
838 unsigned long ptr_val)
839{
840 /* 0F 1A /r BNDLDX-Load */
841 /*/ 0f 1a 04 11 bndldx (%rcx,%rdx,1),%bnd0 */
842 asm volatile(".byte 0x0f,0x1a,0x04,0x11\n\t"
843 : : "c" (ptr_addr), "d" (ptr_val)
844 : "memory");
845}
846
847void __print_context(void *__print_xsave_buffer, int line)
848{
849 uint64_t *bounds = (uint64_t *)(__print_xsave_buffer + MPX_BOUNDS_OFFSET);
850 uint64_t *cfg = (uint64_t *)(__print_xsave_buffer + MPX_CONFIG_OFFSET);
851
852 int i;
853 eprintf("%s()::%d\n", "print_context", line);
854 for (i = 0; i < 4; i++) {
855 eprintf("bound[%d]: 0x%016lx 0x%016lx(0x%016lx)\n", i,
856 (unsigned long)bounds[i*2],
857 ~(unsigned long)bounds[i*2+1],
858 (unsigned long)bounds[i*2+1]);
859 }
860
861 eprintf("cpcfg: %jx cpstatus: %jx\n", cfg[0], cfg[1]);
862}
863#define print_context(x) __print_context(x, __LINE__)
864#ifdef DEBUG
865#define dprint_context(x) print_context(x)
866#else
867#define dprint_context(x) do{}while(0)
868#endif
869
870void init()
871{
872 int i;
873
874 srand((unsigned int)time(NULL));
875
876 for (i = 0; i < 4; i++) {
877 shadow_plb[i][0] = 0;
878 shadow_plb[i][1] = ~(unsigned long)0;
879 }
880}
881
882long int __mpx_random(int line)
883{
884#ifdef NOT_SO_RANDOM
885 static long fake = 722122311;
886 fake += 563792075;
887 return fakse;
888#else
889 return random();
890#endif
891}
892#define mpx_random() __mpx_random(__LINE__)
893
894uint8_t *get_random_addr()
895{
896 uint8_t*addr = (uint8_t *)(unsigned long)(rand() % MAX_ADDR_TESTED);
897 return (addr - (unsigned long)addr % sizeof(uint8_t *));
898}
899
900static inline bool compare_context(void *__xsave_buffer)
901{
902 uint64_t *bounds = (uint64_t *)(__xsave_buffer + MPX_BOUNDS_OFFSET);
903
904 int i;
905 for (i = 0; i < 4; i++) {
906 dprintf3("shadow[%d]{%016lx/%016lx}\nbounds[%d]{%016lx/%016lx}\n",
907 i, (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
908 i, (unsigned long)bounds[i*2], ~(unsigned long)bounds[i*2+1]);
909 if ((shadow_plb[i][0] != bounds[i*2]) ||
910 (shadow_plb[i][1] != ~(unsigned long)bounds[i*2+1])) {
911 eprintf("ERROR comparing shadow to real bound register %d\n", i);
912 eprintf("shadow{0x%016lx/0x%016lx}\nbounds{0x%016lx/0x%016lx}\n",
913 (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
914 (unsigned long)bounds[i*2], (unsigned long)bounds[i*2+1]);
915 return false;
916 }
917 }
918
919 return true;
920}
921
922void mkbnd_shadow(uint8_t *ptr, int index, long offset)
923{
924 uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
925 uint64_t *upper = (uint64_t *)&(shadow_plb[index][1]);
926 *lower = (unsigned long)ptr;
927 *upper = (unsigned long)ptr + offset - 1;
928}
929
930void check_lowerbound_shadow(uint8_t *ptr, int index)
931{
932 uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
933 if (*lower > (uint64_t)(unsigned long)ptr)
934 num_lower_brs++;
935 else
936 dprintf1("LowerBoundChk passed:%p\n", ptr);
937}
938
939void check_upperbound_shadow(uint8_t *ptr, int index)
940{
941 uint64_t upper = *(uint64_t *)&(shadow_plb[index][1]);
942 if (upper < (uint64_t)(unsigned long)ptr)
943 num_upper_brs++;
944 else
945 dprintf1("UpperBoundChk passed:%p\n", ptr);
946}
947
948__always_inline void movbndreg_shadow(int src, int dest)
949{
950 shadow_plb[dest][0] = shadow_plb[src][0];
951 shadow_plb[dest][1] = shadow_plb[src][1];
952}
953
954__always_inline void movbnd2mem_shadow(int src, unsigned long *dest)
955{
956 unsigned long *lower = (unsigned long *)&(shadow_plb[src][0]);
957 unsigned long *upper = (unsigned long *)&(shadow_plb[src][1]);
958 *dest = *lower;
959 *(dest+1) = *upper;
960}
961
962__always_inline void movbnd_from_mem_shadow(unsigned long *src, int dest)
963{
964 unsigned long *lower = (unsigned long *)&(shadow_plb[dest][0]);
965 unsigned long *upper = (unsigned long *)&(shadow_plb[dest][1]);
966 *lower = *src;
967 *upper = *(src+1);
968}
969
970__always_inline void stdsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
971{
972 shadow_map[0] = (unsigned long)shadow_plb[index][0];
973 shadow_map[1] = (unsigned long)shadow_plb[index][1];
974 shadow_map[2] = (unsigned long)ptr_val;
975 dprintf3("%s(%d, %p, %p) set shadow map[2]: %p\n", __func__,
976 index, ptr, ptr_val, ptr_val);
977 /*ptr ignored */
978}
979
980void lddsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
981{
982 uint64_t lower = shadow_map[0];
983 uint64_t upper = shadow_map[1];
984 uint8_t *value = (uint8_t *)shadow_map[2];
985
986 if (value != ptr_val) {
987 dprintf2("%s(%d, %p, %p) init shadow bounds[%d] "
988 "because %p != %p\n", __func__, index, ptr,
989 ptr_val, index, value, ptr_val);
990 shadow_plb[index][0] = 0;
991 shadow_plb[index][1] = ~(unsigned long)0;
992 } else {
993 shadow_plb[index][0] = lower;
994 shadow_plb[index][1] = upper;
995 }
996 /* ptr ignored */
997}
998
999static __always_inline void mpx_test_helper0(uint8_t *buf, uint8_t *ptr)
1000{
1001 mpx_make_bound_helper((unsigned long)ptr, 0x1800);
1002}
1003
1004static __always_inline void mpx_test_helper0_shadow(uint8_t *buf, uint8_t *ptr)
1005{
1006 mkbnd_shadow(ptr, 0, 0x1800);
1007}
1008
1009static __always_inline void mpx_test_helper1(uint8_t *buf, uint8_t *ptr)
1010{
1011 /* these are hard-coded to check bnd0 */
1012 expected_bnd_index = 0;
1013 mpx_check_lowerbound_helper((unsigned long)(ptr-1));
1014 mpx_check_upperbound_helper((unsigned long)(ptr+0x1800));
1015 /* reset this since we do not expect any more bounds exceptions */
1016 expected_bnd_index = -1;
1017}
1018
1019static __always_inline void mpx_test_helper1_shadow(uint8_t *buf, uint8_t *ptr)
1020{
1021 check_lowerbound_shadow(ptr-1, 0);
1022 check_upperbound_shadow(ptr+0x1800, 0);
1023}
1024
1025static __always_inline void mpx_test_helper2(uint8_t *buf, uint8_t *ptr)
1026{
1027 mpx_make_bound_helper((unsigned long)ptr, 0x1800);
1028 mpx_movbndreg_helper();
1029 mpx_movbnd2mem_helper(buf);
1030 mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
1031}
1032
1033static __always_inline void mpx_test_helper2_shadow(uint8_t *buf, uint8_t *ptr)
1034{
1035 mkbnd_shadow(ptr, 0, 0x1800);
1036 movbndreg_shadow(0, 2);
1037 movbnd2mem_shadow(0, (unsigned long *)buf);
1038 mkbnd_shadow(ptr+0x12, 0, 0x1800);
1039}
1040
1041static __always_inline void mpx_test_helper3(uint8_t *buf, uint8_t *ptr)
1042{
1043 mpx_movbnd_from_mem_helper(buf);
1044}
1045
1046static __always_inline void mpx_test_helper3_shadow(uint8_t *buf, uint8_t *ptr)
1047{
1048 movbnd_from_mem_shadow((unsigned long *)buf, 0);
1049}
1050
1051static __always_inline void mpx_test_helper4(uint8_t *buf, uint8_t *ptr)
1052{
1053 mpx_store_dsc_helper((unsigned long)buf, (unsigned long)ptr);
1054 mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
1055}
1056
1057static __always_inline void mpx_test_helper4_shadow(uint8_t *buf, uint8_t *ptr)
1058{
1059 stdsc_shadow(0, buf, ptr);
1060 mkbnd_shadow(ptr+0x12, 0, 0x1800);
1061}
1062
1063static __always_inline void mpx_test_helper5(uint8_t *buf, uint8_t *ptr)
1064{
1065 mpx_load_dsc_helper((unsigned long)buf, (unsigned long)ptr);
1066}
1067
1068static __always_inline void mpx_test_helper5_shadow(uint8_t *buf, uint8_t *ptr)
1069{
1070 lddsc_shadow(0, buf, ptr);
1071}
1072
1073#define NR_MPX_TEST_FUNCTIONS 6
1074
1075/*
1076 * For compatibility reasons, MPX will clear the bounds registers
1077 * when you make function calls (among other things). We have to
1078 * preserve the registers in between calls to the "helpers" since
1079 * they build on each other.
1080 *
1081 * Be very careful not to make any function calls inside the
1082 * helpers, or anywhere else beween the xrstor and xsave.
1083 */
1084#define run_helper(helper_nr, buf, buf_shadow, ptr) do { \
1085 xrstor_state(xsave_test_buf, flags); \
1086 mpx_test_helper##helper_nr(buf, ptr); \
1087 xsave_state(xsave_test_buf, flags); \
1088 mpx_test_helper##helper_nr##_shadow(buf_shadow, ptr); \
1089} while (0)
1090
1091static void run_helpers(int nr, uint8_t *buf, uint8_t *buf_shadow, uint8_t *ptr)
1092{
1093 uint64_t flags = 0x18;
1094
1095 dprint_context(xsave_test_buf);
1096 switch (nr) {
1097 case 0:
1098 run_helper(0, buf, buf_shadow, ptr);
1099 break;
1100 case 1:
1101 run_helper(1, buf, buf_shadow, ptr);
1102 break;
1103 case 2:
1104 run_helper(2, buf, buf_shadow, ptr);
1105 break;
1106 case 3:
1107 run_helper(3, buf, buf_shadow, ptr);
1108 break;
1109 case 4:
1110 run_helper(4, buf, buf_shadow, ptr);
1111 break;
1112 case 5:
1113 run_helper(5, buf, buf_shadow, ptr);
1114 break;
1115 default:
1116 test_failed();
1117 break;
1118 }
1119 dprint_context(xsave_test_buf);
1120}
1121
1122unsigned long buf_shadow[1024]; /* used to check load / store descriptors */
1123extern long inspect_me(struct mpx_bounds_dir *bounds_dir);
1124
1125long cover_buf_with_bt_entries(void *buf, long buf_len)
1126{
1127 int i;
1128 long nr_to_fill;
1129 int ratio = 1000;
1130 unsigned long buf_len_in_ptrs;
1131
1132 /* Fill about 1/100 of the space with bt entries */
1133 nr_to_fill = buf_len / (sizeof(unsigned long) * ratio);
1134
1135 if (!nr_to_fill)
1136 dprintf3("%s() nr_to_fill: %ld\n", __func__, nr_to_fill);
1137
1138 /* Align the buffer to pointer size */
1139 while (((unsigned long)buf) % sizeof(void *)) {
1140 buf++;
1141 buf_len--;
1142 }
1143 /* We are storing pointers, so make */
1144 buf_len_in_ptrs = buf_len / sizeof(void *);
1145
1146 for (i = 0; i < nr_to_fill; i++) {
1147 long index = (mpx_random() % buf_len_in_ptrs);
1148 void *ptr = buf + index * sizeof(unsigned long);
1149 unsigned long ptr_addr = (unsigned long)ptr;
1150
1151 /* ptr and size can be anything */
1152 mpx_make_bound_helper((unsigned long)ptr, 8);
1153
1154 /*
1155 * take bnd0 and put it in to bounds tables "buf + index" is an
1156 * address inside the buffer where we are pretending that we
1157 * are going to put a pointer We do not, though because we will
1158 * never load entries from the table, so it doesn't matter.
1159 */
1160 mpx_store_dsc_helper(ptr_addr, (unsigned long)ptr);
1161 dprintf4("storing bound table entry for %lx (buf start @ %p)\n",
1162 ptr_addr, buf);
1163 }
1164 return nr_to_fill;
1165}
1166
1167unsigned long align_down(unsigned long alignme, unsigned long align_to)
1168{
1169 return alignme & ~(align_to-1);
1170}
1171
1172unsigned long align_up(unsigned long alignme, unsigned long align_to)
1173{
1174 return (alignme + align_to - 1) & ~(align_to-1);
1175}
1176
1177/*
1178 * Using 1MB alignment guarantees that each no allocation
1179 * will overlap with another's bounds tables.
1180 *
1181 * We have to cook our own allocator here. malloc() can
1182 * mix other allocation with ours which means that even
1183 * if we free all of our allocations, there might still
1184 * be bounds tables for the *areas* since there is other
1185 * valid memory there.
1186 *
1187 * We also can't use malloc() because a free() of an area
1188 * might not free it back to the kernel. We want it
1189 * completely unmapped an malloc() does not guarantee
1190 * that.
1191 */
1192#ifdef __i386__
1193long alignment = 4096;
1194long sz_alignment = 4096;
1195#else
1196long alignment = 1 * MB;
1197long sz_alignment = 1 * MB;
1198#endif
1199void *mpx_mini_alloc(unsigned long sz)
1200{
1201 unsigned long long tries = 0;
1202 static void *last;
1203 void *ptr;
1204 void *try_at;
1205
1206 sz = align_up(sz, sz_alignment);
1207
1208 try_at = last + alignment;
1209 while (1) {
1210 ptr = mmap(try_at, sz, PROT_READ|PROT_WRITE,
1211 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
1212 if (ptr == (void *)-1)
1213 return NULL;
1214 if (ptr == try_at)
1215 break;
1216
1217 munmap(ptr, sz);
1218 try_at += alignment;
1219#ifdef __i386__
1220 /*
1221 * This isn't quite correct for 32-bit binaries
1222 * on 64-bit kernels since they can use the
1223 * entire 32-bit address space, but it's close
1224 * enough.
1225 */
1226 if (try_at > (void *)0xC0000000)
1227#else
1228 if (try_at > (void *)0x0000800000000000)
1229#endif
1230 try_at = (void *)0x0;
1231 if (!(++tries % 10000))
1232 dprintf1("stuck in %s(), tries: %lld\n", __func__, tries);
1233 continue;
1234 }
1235 last = ptr;
1236 dprintf3("mpx_mini_alloc(0x%lx) returning: %p\n", sz, ptr);
1237 return ptr;
1238}
1239void mpx_mini_free(void *ptr, long sz)
1240{
1241 dprintf2("%s() ptr: %p\n", __func__, ptr);
1242 if ((unsigned long)ptr > 0x100000000000) {
1243 dprintf1("uh oh !!!!!!!!!!!!!!! pointer too high: %p\n", ptr);
1244 test_failed();
1245 }
1246 sz = align_up(sz, sz_alignment);
1247 dprintf3("%s() ptr: %p before munmap\n", __func__, ptr);
1248 munmap(ptr, sz);
1249 dprintf3("%s() ptr: %p DONE\n", __func__, ptr);
1250}
1251
1252#define NR_MALLOCS 100
1253struct one_malloc {
1254 char *ptr;
1255 int nr_filled_btes;
1256 unsigned long size;
1257};
1258struct one_malloc mallocs[NR_MALLOCS];
1259
1260void free_one_malloc(int index)
1261{
1262 unsigned long free_ptr;
1263 unsigned long mask;
1264
1265 if (!mallocs[index].ptr)
1266 return;
1267
1268 mpx_mini_free(mallocs[index].ptr, mallocs[index].size);
1269 dprintf4("freed[%d]: %p\n", index, mallocs[index].ptr);
1270
1271 free_ptr = (unsigned long)mallocs[index].ptr;
1272 mask = alignment-1;
1273 dprintf4("lowerbits: %lx / %lx mask: %lx\n", free_ptr,
1274 (free_ptr & mask), mask);
1275 assert((free_ptr & mask) == 0);
1276
1277 mallocs[index].ptr = NULL;
1278}
1279
1280#ifdef __i386__
1281#define MPX_BOUNDS_TABLE_COVERS 4096
1282#else
1283#define MPX_BOUNDS_TABLE_COVERS (1 * MB)
1284#endif
1285void zap_everything(void)
1286{
1287 long after_zap;
1288 long before_zap;
1289 int i;
1290
1291 before_zap = inspect_me(bounds_dir_ptr);
1292 dprintf1("zapping everything start: %ld\n", before_zap);
1293 for (i = 0; i < NR_MALLOCS; i++)
1294 free_one_malloc(i);
1295
1296 after_zap = inspect_me(bounds_dir_ptr);
1297 dprintf1("zapping everything done: %ld\n", after_zap);
1298 /*
1299 * We only guarantee to empty the thing out if our allocations are
1300 * exactly aligned on the boundaries of a boudns table.
1301 */
1302 if ((alignment >= MPX_BOUNDS_TABLE_COVERS) &&
1303 (sz_alignment >= MPX_BOUNDS_TABLE_COVERS)) {
1304 if (after_zap != 0)
1305 test_failed();
1306
1307 assert(after_zap == 0);
1308 }
1309}
1310
1311void do_one_malloc(void)
1312{
1313 static int malloc_counter;
1314 long sz;
1315 int rand_index = (mpx_random() % NR_MALLOCS);
1316 void *ptr = mallocs[rand_index].ptr;
1317
1318 dprintf3("%s() enter\n", __func__);
1319
1320 if (ptr) {
1321 dprintf3("freeing one malloc at index: %d\n", rand_index);
1322 free_one_malloc(rand_index);
1323 if (mpx_random() % (NR_MALLOCS*3) == 3) {
1324 int i;
1325 dprintf3("zapping some more\n");
1326 for (i = rand_index; i < NR_MALLOCS; i++)
1327 free_one_malloc(i);
1328 }
1329 if ((mpx_random() % zap_all_every_this_many_mallocs) == 4)
1330 zap_everything();
1331 }
1332
1333 /* 1->~1M */
1334 sz = (1 + mpx_random() % 1000) * 1000;
1335 ptr = mpx_mini_alloc(sz);
1336 if (!ptr) {
1337 /*
1338 * If we are failing allocations, just assume we
1339 * are out of memory and zap everything.
1340 */
1341 dprintf3("zapping everything because out of memory\n");
1342 zap_everything();
1343 goto out;
1344 }
1345
1346 dprintf3("malloc: %p size: 0x%lx\n", ptr, sz);
1347 mallocs[rand_index].nr_filled_btes = cover_buf_with_bt_entries(ptr, sz);
1348 mallocs[rand_index].ptr = ptr;
1349 mallocs[rand_index].size = sz;
1350out:
1351 if ((++malloc_counter) % inspect_every_this_many_mallocs == 0)
1352 inspect_me(bounds_dir_ptr);
1353}
1354
1355void run_timed_test(void (*test_func)(void))
1356{
1357 int done = 0;
1358 long iteration = 0;
1359 static time_t last_print;
1360 time_t now;
1361 time_t start;
1362
1363 time(&start);
1364 while (!done) {
1365 time(&now);
1366 if ((now - start) > TEST_DURATION_SECS)
1367 done = 1;
1368
1369 test_func();
1370 iteration++;
1371
1372 if ((now - last_print > 1) || done) {
1373 printf("iteration %ld complete, OK so far\n", iteration);
1374 last_print = now;
1375 }
1376 }
1377}
1378
1379void check_bounds_table_frees(void)
1380{
1381 printf("executing unmaptest\n");
1382 inspect_me(bounds_dir_ptr);
1383 run_timed_test(&do_one_malloc);
1384 printf("done with malloc() fun\n");
1385}
1386
1387void insn_test_failed(int test_nr, int test_round, void *buf,
1388 void *buf_shadow, void *ptr)
1389{
1390 print_context(xsave_test_buf);
1391 eprintf("ERROR: test %d round %d failed\n", test_nr, test_round);
1392 while (test_nr == 5) {
1393 struct mpx_bt_entry *bte;
1394 struct mpx_bounds_dir *bd = (void *)bounds_dir_ptr;
1395 struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(buf, bd);
1396
1397 printf(" bd: %p\n", bd);
1398 printf("&bde: %p\n", bde);
1399 printf("*bde: %lx\n", *(unsigned long *)bde);
1400 if (!bd_entry_valid(bde))
1401 break;
1402
1403 bte = mpx_vaddr_to_bt_entry(buf, bd);
1404 printf(" te: %p\n", bte);
1405 printf("bte[0]: %lx\n", bte->contents[0]);
1406 printf("bte[1]: %lx\n", bte->contents[1]);
1407 printf("bte[2]: %lx\n", bte->contents[2]);
1408 printf("bte[3]: %lx\n", bte->contents[3]);
1409 break;
1410 }
1411 test_failed();
1412}
1413
1414void check_mpx_insns_and_tables(void)
1415{
1416 int successes = 0;
1417 int failures = 0;
1418 int buf_size = (1024*1024);
1419 unsigned long *buf = malloc(buf_size);
1420 const int total_nr_tests = NR_MPX_TEST_FUNCTIONS * TEST_ROUNDS;
1421 int i, j;
1422
1423 memset(buf, 0, buf_size);
1424 memset(buf_shadow, 0, sizeof(buf_shadow));
1425
1426 for (i = 0; i < TEST_ROUNDS; i++) {
1427 uint8_t *ptr = get_random_addr() + 8;
1428
1429 for (j = 0; j < NR_MPX_TEST_FUNCTIONS; j++) {
1430 if (0 && j != 5) {
1431 successes++;
1432 continue;
1433 }
1434 dprintf2("starting test %d round %d\n", j, i);
1435 dprint_context(xsave_test_buf);
1436 /*
1437 * test5 loads an address from the bounds tables.
1438 * The load will only complete if 'ptr' matches
1439 * the load and the store, so with random addrs,
1440 * the odds of this are very small. Make it
1441 * higher by only moving 'ptr' 1/10 times.
1442 */
1443 if (random() % 10 <= 0)
1444 ptr = get_random_addr() + 8;
1445 dprintf3("random ptr{%p}\n", ptr);
1446 dprint_context(xsave_test_buf);
1447 run_helpers(j, (void *)buf, (void *)buf_shadow, ptr);
1448 dprint_context(xsave_test_buf);
1449 if (!compare_context(xsave_test_buf)) {
1450 insn_test_failed(j, i, buf, buf_shadow, ptr);
1451 failures++;
1452 goto exit;
1453 }
1454 successes++;
1455 dprint_context(xsave_test_buf);
1456 dprintf2("finished test %d round %d\n", j, i);
1457 dprintf3("\n");
1458 dprint_context(xsave_test_buf);
1459 }
1460 }
1461
1462exit:
1463 dprintf2("\nabout to free:\n");
1464 free(buf);
1465 dprintf1("successes: %d\n", successes);
1466 dprintf1(" failures: %d\n", failures);
1467 dprintf1(" tests: %d\n", total_nr_tests);
1468 dprintf1(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
1469 dprintf1(" saw: %d #BRs\n", br_count);
1470 if (failures) {
1471 eprintf("ERROR: non-zero number of failures\n");
1472 exit(20);
1473 }
1474 if (successes != total_nr_tests) {
1475 eprintf("ERROR: succeded fewer than number of tries (%d != %d)\n",
1476 successes, total_nr_tests);
1477 exit(21);
1478 }
1479 if (num_upper_brs + num_lower_brs != br_count) {
1480 eprintf("ERROR: unexpected number of #BRs: %jd %jd %d\n",
1481 num_upper_brs, num_lower_brs, br_count);
1482 eprintf("successes: %d\n", successes);
1483 eprintf(" failures: %d\n", failures);
1484 eprintf(" tests: %d\n", total_nr_tests);
1485 eprintf(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
1486 eprintf(" saw: %d #BRs\n", br_count);
1487 exit(22);
1488 }
1489}
1490
1491/*
1492 * This is supposed to SIGSEGV nicely once the kernel
1493 * can no longer allocate vaddr space.
1494 */
1495void exhaust_vaddr_space(void)
1496{
1497 unsigned long ptr;
1498 /* Try to make sure there is no room for a bounds table anywhere */
1499 unsigned long skip = MPX_BOUNDS_TABLE_SIZE_BYTES - PAGE_SIZE;
1500#ifdef __i386__
1501 unsigned long max_vaddr = 0xf7788000UL;
1502#else
1503 unsigned long max_vaddr = 0x800000000000UL;
1504#endif
1505
1506 dprintf1("%s() start\n", __func__);
1507 /* do not start at 0, we aren't allowed to map there */
1508 for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
1509 void *ptr_ret;
1510 int ret = madvise((void *)ptr, PAGE_SIZE, MADV_NORMAL);
1511
1512 if (!ret) {
1513 dprintf1("madvise() %lx ret: %d\n", ptr, ret);
1514 continue;
1515 }
1516 ptr_ret = mmap((void *)ptr, PAGE_SIZE, PROT_READ|PROT_WRITE,
1517 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
1518 if (ptr_ret != (void *)ptr) {
1519 perror("mmap");
1520 dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
1521 break;
1522 }
1523 if (!(ptr & 0xffffff))
1524 dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
1525 }
1526 for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
1527 dprintf2("covering 0x%lx with bounds table entries\n", ptr);
1528 cover_buf_with_bt_entries((void *)ptr, PAGE_SIZE);
1529 }
1530 dprintf1("%s() end\n", __func__);
1531 printf("done with vaddr space fun\n");
1532}
1533
1534void mpx_table_test(void)
1535{
1536 printf("starting mpx bounds table test\n");
1537 run_timed_test(check_mpx_insns_and_tables);
1538 printf("done with mpx bounds table test\n");
1539}
1540
1541int main(int argc, char **argv)
1542{
1543 int unmaptest = 0;
1544 int vaddrexhaust = 0;
1545 int tabletest = 0;
1546 int i;
1547
1548 check_mpx_support();
1549 mpx_prepare();
1550 srandom(11179);
1551
1552 bd_incore();
1553 init();
1554 bd_incore();
1555
1556 trace_me();
1557
1558 xsave_state((void *)xsave_test_buf, 0x1f);
1559 if (!compare_context(xsave_test_buf))
1560 printf("Init failed\n");
1561
1562 for (i = 1; i < argc; i++) {
1563 if (!strcmp(argv[i], "unmaptest"))
1564 unmaptest = 1;
1565 if (!strcmp(argv[i], "vaddrexhaust"))
1566 vaddrexhaust = 1;
1567 if (!strcmp(argv[i], "tabletest"))
1568 tabletest = 1;
1569 }
1570 if (!(unmaptest || vaddrexhaust || tabletest)) {
1571 unmaptest = 1;
1572 /* vaddrexhaust = 1; */
1573 tabletest = 1;
1574 }
1575 if (unmaptest)
1576 check_bounds_table_frees();
1577 if (tabletest)
1578 mpx_table_test();
1579 if (vaddrexhaust)
1580 exhaust_vaddr_space();
1581 printf("%s completed successfully\n", argv[0]);
1582 exit(0);
1583}
1584
1585#include "mpx-dig.c"
diff --git a/tools/testing/selftests/x86/mpx-mm.h b/tools/testing/selftests/x86/mpx-mm.h
new file mode 100644
index 000000000000..af706a5398f7
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-mm.h
@@ -0,0 +1,9 @@
1#ifndef _MPX_MM_H
2#define _MPX_MM_H
3
4#define PAGE_SIZE 4096
5#define MB (1UL<<20)
6
7extern long nr_incore(void *ptr, unsigned long size_bytes);
8
9#endif /* _MPX_MM_H */
diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
new file mode 100644
index 000000000000..bf0d687c7db7
--- /dev/null
+++ b/tools/testing/selftests/x86/test_mremap_vdso.c
@@ -0,0 +1,111 @@
1/*
2 * 32-bit test to check vDSO mremap.
3 *
4 * Copyright (c) 2016 Dmitry Safonov
5 * Suggested-by: Andrew Lutomirski
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 */
16/*
17 * Can be built statically:
18 * gcc -Os -Wall -static -m32 test_mremap_vdso.c
19 */
20#define _GNU_SOURCE
21#include <stdio.h>
22#include <errno.h>
23#include <unistd.h>
24#include <string.h>
25
26#include <sys/mman.h>
27#include <sys/auxv.h>
28#include <sys/syscall.h>
29#include <sys/wait.h>
30
31#define PAGE_SIZE 4096
32
33static int try_to_remap(void *vdso_addr, unsigned long size)
34{
35 void *dest_addr, *new_addr;
36
37 /* Searching for memory location where to remap */
38 dest_addr = mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
39 if (dest_addr == MAP_FAILED) {
40 printf("[WARN]\tmmap failed (%d): %m\n", errno);
41 return 0;
42 }
43
44 printf("[NOTE]\tMoving vDSO: [%p, %#lx] -> [%p, %#lx]\n",
45 vdso_addr, (unsigned long)vdso_addr + size,
46 dest_addr, (unsigned long)dest_addr + size);
47 fflush(stdout);
48
49 new_addr = mremap(vdso_addr, size, size,
50 MREMAP_FIXED|MREMAP_MAYMOVE, dest_addr);
51 if ((unsigned long)new_addr == (unsigned long)-1) {
52 munmap(dest_addr, size);
53 if (errno == EINVAL) {
54 printf("[NOTE]\tvDSO partial move failed, will try with bigger size\n");
55 return -1; /* Retry with larger */
56 }
57 printf("[FAIL]\tmremap failed (%d): %m\n", errno);
58 return 1;
59 }
60
61 return 0;
62
63}
64
65int main(int argc, char **argv, char **envp)
66{
67 pid_t child;
68
69 child = fork();
70 if (child == -1) {
71 printf("[WARN]\tfailed to fork (%d): %m\n", errno);
72 return 1;
73 }
74
75 if (child == 0) {
76 unsigned long vdso_size = PAGE_SIZE;
77 unsigned long auxval;
78 int ret = -1;
79
80 auxval = getauxval(AT_SYSINFO_EHDR);
81 printf("\tAT_SYSINFO_EHDR is %#lx\n", auxval);
82 if (!auxval || auxval == -ENOENT) {
83 printf("[WARN]\tgetauxval failed\n");
84 return 0;
85 }
86
87 /* Simpler than parsing ELF header */
88 while (ret < 0) {
89 ret = try_to_remap((void *)auxval, vdso_size);
90 vdso_size += PAGE_SIZE;
91 }
92
93 /* Glibc is likely to explode now - exit with raw syscall */
94 asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret));
95 } else {
96 int status;
97
98 if (waitpid(child, &status, 0) != child ||
99 !WIFEXITED(status)) {
100 printf("[FAIL]\tmremap() of the vDSO does not work on this kernel!\n");
101 return 1;
102 } else if (WEXITSTATUS(status) != 0) {
103 printf("[FAIL]\tChild failed with %d\n",
104 WEXITSTATUS(status));
105 return 1;
106 }
107 printf("[OK]\n");
108 }
109
110 return 0;
111}