aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/x86/intel_mpx.txt234
-rw-r--r--arch/ia64/include/uapi/asm/siginfo.h8
-rw-r--r--arch/mips/include/uapi/asm/siginfo.h4
-rw-r--r--arch/s390/include/asm/mmu_context.h11
-rw-r--r--arch/um/include/asm/mmu_context.h24
-rw-r--r--arch/unicore32/include/asm/mmu_context.h11
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/include/asm/disabled-features.h8
-rw-r--r--arch/x86/include/asm/insn.h10
-rw-r--r--arch/x86/include/asm/mmu_context.h26
-rw-r--r--arch/x86/include/asm/mpx.h103
-rw-r--r--arch/x86/include/asm/paravirt.h16
-rw-r--r--arch/x86/include/asm/processor.h33
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c17
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c25
-rw-r--r--arch/x86/kernel/kprobes/core.c8
-rw-r--r--arch/x86/kernel/kprobes/opt.c4
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/traps.c85
-rw-r--r--arch/x86/kernel/uprobes.c2
-rw-r--r--arch/x86/lib/insn.c5
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/mpx.c928
-rw-r--r--arch/x86/tools/insn_sanity.c2
-rw-r--r--arch/x86/tools/test_get_len.c2
-rw-r--r--fs/exec.c1
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--include/asm-generic/mm_hooks.h17
-rw-r--r--include/linux/mm.h6
-rw-r--r--include/linux/mm_types.h4
-rw-r--r--include/uapi/asm-generic/siginfo.h9
-rw-r--r--include/uapi/linux/prctl.h6
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/sys.c12
-rw-r--r--mm/mmap.c2
35 files changed, 1591 insertions, 47 deletions
diff --git a/Documentation/x86/intel_mpx.txt b/Documentation/x86/intel_mpx.txt
new file mode 100644
index 000000000000..4472ed2ad921
--- /dev/null
+++ b/Documentation/x86/intel_mpx.txt
@@ -0,0 +1,234 @@
11. Intel(R) MPX Overview
2========================
3
4Intel(R) Memory Protection Extensions (Intel(R) MPX) is a new capability
5introduced into Intel Architecture. Intel MPX provides hardware features
6that can be used in conjunction with compiler changes to check memory
7references, for those references whose compile-time normal intentions are
8usurped at runtime due to buffer overflow or underflow.
9
10For more information, please refer to Intel(R) Architecture Instruction
11Set Extensions Programming Reference, Chapter 9: Intel(R) Memory Protection
12Extensions.
13
14Note: Currently no hardware with MPX ISA is available but it is always
15possible to use SDE (Intel(R) Software Development Emulator) instead, which
16can be downloaded from
17http://software.intel.com/en-us/articles/intel-software-development-emulator
18
19
202. How to get the advantage of MPX
21==================================
22
23For MPX to work, changes are required in the kernel, binutils and compiler.
24No source changes are required for applications, just a recompile.
25
26There are a lot of moving parts of this to all work right. The following
27is how we expect the compiler, application and kernel to work together.
28
291) Application developer compiles with -fmpx. The compiler will add the
30 instrumentation as well as some setup code called early after the app
31 starts. New instruction prefixes are noops for old CPUs.
322) That setup code allocates (virtual) space for the "bounds directory",
33 points the "bndcfgu" register to the directory and notifies the kernel
34 (via the new prctl(PR_MPX_ENABLE_MANAGEMENT)) that the app will be using
35 MPX.
363) The kernel detects that the CPU has MPX, allows the new prctl() to
37 succeed, and notes the location of the bounds directory. Userspace is
38 expected to keep the bounds directory at that locationWe note it
39 instead of reading it each time because the 'xsave' operation needed
40 to access the bounds directory register is an expensive operation.
414) If the application needs to spill bounds out of the 4 registers, it
42 issues a bndstx instruction. Since the bounds directory is empty at
43 this point, a bounds fault (#BR) is raised, the kernel allocates a
44 bounds table (in the user address space) and makes the relevant entry
45 in the bounds directory point to the new table.
465) If the application violates the bounds specified in the bounds registers,
47 a separate kind of #BR is raised which will deliver a signal with
48 information about the violation in the 'struct siginfo'.
496) Whenever memory is freed, we know that it can no longer contain valid
50 pointers, and we attempt to free the associated space in the bounds
51 tables. If an entire table becomes unused, we will attempt to free
52 the table and remove the entry in the directory.
53
54To summarize, there are essentially three things interacting here:
55
56GCC with -fmpx:
57 * enables annotation of code with MPX instructions and prefixes
58 * inserts code early in the application to call in to the "gcc runtime"
59GCC MPX Runtime:
60 * Checks for hardware MPX support in cpuid leaf
61 * allocates virtual space for the bounds directory (malloc() essentially)
62 * points the hardware BNDCFGU register at the directory
63 * calls a new prctl(PR_MPX_ENABLE_MANAGEMENT) to notify the kernel to
64 start managing the bounds directories
65Kernel MPX Code:
66 * Checks for hardware MPX support in cpuid leaf
67 * Handles #BR exceptions and sends SIGSEGV to the app when it violates
68 bounds, like during a buffer overflow.
69 * When bounds are spilled in to an unallocated bounds table, the kernel
70 notices in the #BR exception, allocates the virtual space, then
71 updates the bounds directory to point to the new table. It keeps
72 special track of the memory with a VM_MPX flag.
73 * Frees unused bounds tables at the time that the memory they described
74 is unmapped.
75
76
773. How does MPX kernel code work
78================================
79
80Handling #BR faults caused by MPX
81---------------------------------
82
83When MPX is enabled, there are 2 new situations that can generate
84#BR faults.
85 * new bounds tables (BT) need to be allocated to save bounds.
86 * bounds violation caused by MPX instructions.
87
88We hook #BR handler to handle these two new situations.
89
90On-demand kernel allocation of bounds tables
91--------------------------------------------
92
93MPX only has 4 hardware registers for storing bounds information. If
94MPX-enabled code needs more than these 4 registers, it needs to spill
95them somewhere. It has two special instructions for this which allow
96the bounds to be moved between the bounds registers and some new "bounds
97tables".
98
99#BR exceptions are a new class of exceptions just for MPX. They are
100similar conceptually to a page fault and will be raised by the MPX
101hardware during both bounds violations or when the tables are not
102present. The kernel handles those #BR exceptions for not-present tables
103by carving the space out of the normal processes address space and then
104pointing the bounds-directory over to it.
105
106The tables need to be accessed and controlled by userspace because
107the instructions for moving bounds in and out of them are extremely
108frequent. They potentially happen every time a register points to
109memory. Any direct kernel involvement (like a syscall) to access the
110tables would obviously destroy performance.
111
112Why not do this in userspace? MPX does not strictly require anything in
113the kernel. It can theoretically be done completely from userspace. Here
114are a few ways this could be done. We don't think any of them are practical
115in the real-world, but here they are.
116
117Q: Can virtual space simply be reserved for the bounds tables so that we
118 never have to allocate them?
119A: MPX-enabled application will possibly create a lot of bounds tables in
120 process address space to save bounds information. These tables can take
121 up huge swaths of memory (as much as 80% of the memory on the system)
122 even if we clean them up aggressively. In the worst-case scenario, the
123 tables can be 4x the size of the data structure being tracked. IOW, a
124 1-page structure can require 4 bounds-table pages. An X-GB virtual
125 area needs 4*X GB of virtual space, plus 2GB for the bounds directory.
126 If we were to preallocate them for the 128TB of user virtual address
127 space, we would need to reserve 512TB+2GB, which is larger than the
128 entire virtual address space today. This means they can not be reserved
129 ahead of time. Also, a single process's pre-popualated bounds directory
130 consumes 2GB of virtual *AND* physical memory. IOW, it's completely
131 infeasible to prepopulate bounds directories.
132
133Q: Can we preallocate bounds table space at the same time memory is
134 allocated which might contain pointers that might eventually need
135 bounds tables?
136A: This would work if we could hook the site of each and every memory
137 allocation syscall. This can be done for small, constrained applications.
138 But, it isn't practical at a larger scale since a given app has no
139 way of controlling how all the parts of the app might allocate memory
140 (think libraries). The kernel is really the only place to intercept
141 these calls.
142
143Q: Could a bounds fault be handed to userspace and the tables allocated
144 there in a signal handler intead of in the kernel?
145A: mmap() is not on the list of safe async handler functions and even
146 if mmap() would work it still requires locking or nasty tricks to
147 keep track of the allocation state there.
148
149Having ruled out all of the userspace-only approaches for managing
150bounds tables that we could think of, we create them on demand in
151the kernel.
152
153Decoding MPX instructions
154-------------------------
155
156If a #BR is generated due to a bounds violation caused by MPX.
157We need to decode MPX instructions to get violation address and
158set this address into extended struct siginfo.
159
160The _sigfault feild of struct siginfo is extended as follow:
161
16287 /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
16388 struct {
16489 void __user *_addr; /* faulting insn/memory ref. */
16590 #ifdef __ARCH_SI_TRAPNO
16691 int _trapno; /* TRAP # which caused the signal */
16792 #endif
16893 short _addr_lsb; /* LSB of the reported address */
16994 struct {
17095 void __user *_lower;
17196 void __user *_upper;
17297 } _addr_bnd;
17398 } _sigfault;
174
175The '_addr' field refers to violation address, and new '_addr_and'
176field refers to the upper/lower bounds when a #BR is caused.
177
178Glibc will be also updated to support this new siginfo. So user
179can get violation address and bounds when bounds violations occur.
180
181Cleanup unused bounds tables
182----------------------------
183
184When a BNDSTX instruction attempts to save bounds to a bounds directory
185entry marked as invalid, a #BR is generated. This is an indication that
186no bounds table exists for this entry. In this case the fault handler
187will allocate a new bounds table on demand.
188
189Since the kernel allocated those tables on-demand without userspace
190knowledge, it is also responsible for freeing them when the associated
191mappings go away.
192
193Here, the solution for this issue is to hook do_munmap() to check
194whether one process is MPX enabled. If yes, those bounds tables covered
195in the virtual address region which is being unmapped will be freed also.
196
197Adding new prctl commands
198-------------------------
199
200Two new prctl commands are added to enable and disable MPX bounds tables
201management in kernel.
202
203155 #define PR_MPX_ENABLE_MANAGEMENT 43
204156 #define PR_MPX_DISABLE_MANAGEMENT 44
205
206Runtime library in userspace is responsible for allocation of bounds
207directory. So kernel have to use XSAVE instruction to get the base
208of bounds directory from BNDCFG register.
209
210But XSAVE is expected to be very expensive. In order to do performance
211optimization, we have to get the base of bounds directory and save it
212into struct mm_struct to be used in future during PR_MPX_ENABLE_MANAGEMENT
213command execution.
214
215
2164. Special rules
217================
218
2191) If userspace is requesting help from the kernel to do the management
220of bounds tables, it may not create or modify entries in the bounds directory.
221
222Certainly users can allocate bounds tables and forcibly point the bounds
223directory at them through XSAVE instruction, and then set valid bit
224of bounds entry to have this entry valid. But, the kernel will decline
225to assist in managing these tables.
226
2272) Userspace may not take multiple bounds directory entries and point
228them at the same bounds table.
229
230This is allowed architecturally. See more information "Intel(R) Architecture
231Instruction Set Extensions Programming Reference" (9.3.4).
232
233However, if users did this, the kernel might be fooled in to unmaping an
234in-use bounds table since it does not recognize sharing.
diff --git a/arch/ia64/include/uapi/asm/siginfo.h b/arch/ia64/include/uapi/asm/siginfo.h
index 4ea6225196bb..bce9bc1a66c4 100644
--- a/arch/ia64/include/uapi/asm/siginfo.h
+++ b/arch/ia64/include/uapi/asm/siginfo.h
@@ -63,6 +63,10 @@ typedef struct siginfo {
63 unsigned int _flags; /* see below */ 63 unsigned int _flags; /* see below */
64 unsigned long _isr; /* isr */ 64 unsigned long _isr; /* isr */
65 short _addr_lsb; /* lsb of faulting address */ 65 short _addr_lsb; /* lsb of faulting address */
66 struct {
67 void __user *_lower;
68 void __user *_upper;
69 } _addr_bnd;
66 } _sigfault; 70 } _sigfault;
67 71
68 /* SIGPOLL */ 72 /* SIGPOLL */
@@ -110,9 +114,9 @@ typedef struct siginfo {
110/* 114/*
111 * SIGSEGV si_codes 115 * SIGSEGV si_codes
112 */ 116 */
113#define __SEGV_PSTKOVF (__SI_FAULT|3) /* paragraph stack overflow */ 117#define __SEGV_PSTKOVF (__SI_FAULT|4) /* paragraph stack overflow */
114#undef NSIGSEGV 118#undef NSIGSEGV
115#define NSIGSEGV 3 119#define NSIGSEGV 4
116 120
117#undef NSIGTRAP 121#undef NSIGTRAP
118#define NSIGTRAP 4 122#define NSIGTRAP 4
diff --git a/arch/mips/include/uapi/asm/siginfo.h b/arch/mips/include/uapi/asm/siginfo.h
index e81174432bab..d08f83f19db5 100644
--- a/arch/mips/include/uapi/asm/siginfo.h
+++ b/arch/mips/include/uapi/asm/siginfo.h
@@ -92,6 +92,10 @@ typedef struct siginfo {
92 int _trapno; /* TRAP # which caused the signal */ 92 int _trapno; /* TRAP # which caused the signal */
93#endif 93#endif
94 short _addr_lsb; 94 short _addr_lsb;
95 struct {
96 void __user *_lower;
97 void __user *_upper;
98 } _addr_bnd;
95 } _sigfault; 99 } _sigfault;
96 100
97 /* SIGPOLL, SIGXFSZ (To do ...) */ 101 /* SIGPOLL, SIGXFSZ (To do ...) */
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 3815bfea1b2d..f49b71954654 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -120,4 +120,15 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
120{ 120{
121} 121}
122 122
123static inline void arch_unmap(struct mm_struct *mm,
124 struct vm_area_struct *vma,
125 unsigned long start, unsigned long end)
126{
127}
128
129static inline void arch_bprm_mm_init(struct mm_struct *mm,
130 struct vm_area_struct *vma)
131{
132}
133
123#endif /* __S390_MMU_CONTEXT_H */ 134#endif /* __S390_MMU_CONTEXT_H */
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index aa4a743dc4ab..941527e507f7 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -10,7 +10,26 @@
10#include <asm/mmu.h> 10#include <asm/mmu.h>
11 11
12extern void uml_setup_stubs(struct mm_struct *mm); 12extern void uml_setup_stubs(struct mm_struct *mm);
13/*
14 * Needed since we do not use the asm-generic/mm_hooks.h:
15 */
16static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
17{
18 uml_setup_stubs(mm);
19}
13extern void arch_exit_mmap(struct mm_struct *mm); 20extern void arch_exit_mmap(struct mm_struct *mm);
21static inline void arch_unmap(struct mm_struct *mm,
22 struct vm_area_struct *vma,
23 unsigned long start, unsigned long end)
24{
25}
26static inline void arch_bprm_mm_init(struct mm_struct *mm,
27 struct vm_area_struct *vma)
28{
29}
30/*
31 * end asm-generic/mm_hooks.h functions
32 */
14 33
15#define deactivate_mm(tsk,mm) do { } while (0) 34#define deactivate_mm(tsk,mm) do { } while (0)
16 35
@@ -41,11 +60,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
41 } 60 }
42} 61}
43 62
44static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
45{
46 uml_setup_stubs(mm);
47}
48
49static inline void enter_lazy_tlb(struct mm_struct *mm, 63static inline void enter_lazy_tlb(struct mm_struct *mm,
50 struct task_struct *tsk) 64 struct task_struct *tsk)
51{ 65{
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index ef470a7a3d0f..1cb5220afaf9 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -86,4 +86,15 @@ static inline void arch_dup_mmap(struct mm_struct *oldmm,
86{ 86{
87} 87}
88 88
89static inline void arch_unmap(struct mm_struct *mm,
90 struct vm_area_struct *vma,
91 unsigned long start, unsigned long end)
92{
93}
94
95static inline void arch_bprm_mm_init(struct mm_struct *mm,
96 struct vm_area_struct *vma)
97{
98}
99
89#endif 100#endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 41a503c15862..666ac6651c17 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -248,6 +248,10 @@ config HAVE_INTEL_TXT
248 def_bool y 248 def_bool y
249 depends on INTEL_IOMMU && ACPI 249 depends on INTEL_IOMMU && ACPI
250 250
251config X86_INTEL_MPX
252 def_bool y
253 depends on CPU_SUP_INTEL
254
251config X86_32_SMP 255config X86_32_SMP
252 def_bool y 256 def_bool y
253 depends on X86_32 && SMP 257 depends on X86_32 && SMP
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 97534a7d38e3..f226df064660 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -10,6 +10,12 @@
10 * cpu_feature_enabled(). 10 * cpu_feature_enabled().
11 */ 11 */
12 12
13#ifdef CONFIG_X86_INTEL_MPX
14# define DISABLE_MPX 0
15#else
16# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31))
17#endif
18
13#ifdef CONFIG_X86_64 19#ifdef CONFIG_X86_64
14# define DISABLE_VME (1<<(X86_FEATURE_VME & 31)) 20# define DISABLE_VME (1<<(X86_FEATURE_VME & 31))
15# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) 21# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
@@ -34,6 +40,6 @@
34#define DISABLED_MASK6 0 40#define DISABLED_MASK6 0
35#define DISABLED_MASK7 0 41#define DISABLED_MASK7 0
36#define DISABLED_MASK8 0 42#define DISABLED_MASK8 0
37#define DISABLED_MASK9 0 43#define DISABLED_MASK9 (DISABLE_MPX)
38 44
39#endif /* _ASM_X86_DISABLED_FEATURES_H */ 45#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 48eb30a86062..47f29b1d1846 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -65,6 +65,7 @@ struct insn {
65 unsigned char x86_64; 65 unsigned char x86_64;
66 66
67 const insn_byte_t *kaddr; /* kernel address of insn to analyze */ 67 const insn_byte_t *kaddr; /* kernel address of insn to analyze */
68 const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */
68 const insn_byte_t *next_byte; 69 const insn_byte_t *next_byte;
69}; 70};
70 71
@@ -96,7 +97,7 @@ struct insn {
96#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ 97#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
97#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ 98#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
98 99
99extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); 100extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
100extern void insn_get_prefixes(struct insn *insn); 101extern void insn_get_prefixes(struct insn *insn);
101extern void insn_get_opcode(struct insn *insn); 102extern void insn_get_opcode(struct insn *insn);
102extern void insn_get_modrm(struct insn *insn); 103extern void insn_get_modrm(struct insn *insn);
@@ -115,12 +116,13 @@ static inline void insn_get_attribute(struct insn *insn)
115extern int insn_rip_relative(struct insn *insn); 116extern int insn_rip_relative(struct insn *insn);
116 117
117/* Init insn for kernel text */ 118/* Init insn for kernel text */
118static inline void kernel_insn_init(struct insn *insn, const void *kaddr) 119static inline void kernel_insn_init(struct insn *insn,
120 const void *kaddr, int buf_len)
119{ 121{
120#ifdef CONFIG_X86_64 122#ifdef CONFIG_X86_64
121 insn_init(insn, kaddr, 1); 123 insn_init(insn, kaddr, buf_len, 1);
122#else /* CONFIG_X86_32 */ 124#else /* CONFIG_X86_32 */
123 insn_init(insn, kaddr, 0); 125 insn_init(insn, kaddr, buf_len, 0);
124#endif 126#endif
125} 127}
126 128
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 166af2a8e865..be91d5736e08 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -10,9 +10,8 @@
10#include <asm/pgalloc.h> 10#include <asm/pgalloc.h>
11#include <asm/tlbflush.h> 11#include <asm/tlbflush.h>
12#include <asm/paravirt.h> 12#include <asm/paravirt.h>
13#include <asm/mpx.h>
13#ifndef CONFIG_PARAVIRT 14#ifndef CONFIG_PARAVIRT
14#include <asm-generic/mm_hooks.h>
15
16static inline void paravirt_activate_mm(struct mm_struct *prev, 15static inline void paravirt_activate_mm(struct mm_struct *prev,
17 struct mm_struct *next) 16 struct mm_struct *next)
18{ 17{
@@ -102,4 +101,27 @@ do { \
102} while (0) 101} while (0)
103#endif 102#endif
104 103
104static inline void arch_dup_mmap(struct mm_struct *oldmm,
105 struct mm_struct *mm)
106{
107 paravirt_arch_dup_mmap(oldmm, mm);
108}
109
110static inline void arch_exit_mmap(struct mm_struct *mm)
111{
112 paravirt_arch_exit_mmap(mm);
113}
114
115static inline void arch_bprm_mm_init(struct mm_struct *mm,
116 struct vm_area_struct *vma)
117{
118 mpx_mm_init(mm);
119}
120
121static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
122 unsigned long start, unsigned long end)
123{
124 mpx_notify_unmap(mm, vma, start, end);
125}
126
105#endif /* _ASM_X86_MMU_CONTEXT_H */ 127#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
new file mode 100644
index 000000000000..a952a13d59a7
--- /dev/null
+++ b/arch/x86/include/asm/mpx.h
@@ -0,0 +1,103 @@
1#ifndef _ASM_X86_MPX_H
2#define _ASM_X86_MPX_H
3
4#include <linux/types.h>
5#include <asm/ptrace.h>
6#include <asm/insn.h>
7
8/*
9 * NULL is theoretically a valid place to put the bounds
10 * directory, so point this at an invalid address.
11 */
12#define MPX_INVALID_BOUNDS_DIR ((void __user *)-1)
13#define MPX_BNDCFG_ENABLE_FLAG 0x1
14#define MPX_BD_ENTRY_VALID_FLAG 0x1
15
16#ifdef CONFIG_X86_64
17
18/* upper 28 bits [47:20] of the virtual address in 64-bit used to
19 * index into bounds directory (BD).
20 */
21#define MPX_BD_ENTRY_OFFSET 28
22#define MPX_BD_ENTRY_SHIFT 3
23/* bits [19:3] of the virtual address in 64-bit used to index into
24 * bounds table (BT).
25 */
26#define MPX_BT_ENTRY_OFFSET 17
27#define MPX_BT_ENTRY_SHIFT 5
28#define MPX_IGN_BITS 3
29#define MPX_BD_ENTRY_TAIL 3
30
31#else
32
33#define MPX_BD_ENTRY_OFFSET 20
34#define MPX_BD_ENTRY_SHIFT 2
35#define MPX_BT_ENTRY_OFFSET 10
36#define MPX_BT_ENTRY_SHIFT 4
37#define MPX_IGN_BITS 2
38#define MPX_BD_ENTRY_TAIL 2
39
40#endif
41
42#define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT))
43#define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT))
44
45#define MPX_BNDSTA_TAIL 2
46#define MPX_BNDCFG_TAIL 12
47#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1))
48#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
49#define MPX_BT_ADDR_MASK (~((1UL<<MPX_BD_ENTRY_TAIL)-1))
50
51#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
52#define MPX_BNDSTA_ERROR_CODE 0x3
53
54#define MPX_BD_ENTRY_MASK ((1<<MPX_BD_ENTRY_OFFSET)-1)
55#define MPX_BT_ENTRY_MASK ((1<<MPX_BT_ENTRY_OFFSET)-1)
56#define MPX_GET_BD_ENTRY_OFFSET(addr) ((((addr)>>(MPX_BT_ENTRY_OFFSET+ \
57 MPX_IGN_BITS)) & MPX_BD_ENTRY_MASK) << MPX_BD_ENTRY_SHIFT)
58#define MPX_GET_BT_ENTRY_OFFSET(addr) ((((addr)>>MPX_IGN_BITS) & \
59 MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT)
60
61#ifdef CONFIG_X86_INTEL_MPX
62siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
63 struct xsave_struct *xsave_buf);
64int mpx_handle_bd_fault(struct xsave_struct *xsave_buf);
65static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
66{
67 return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR);
68}
69static inline void mpx_mm_init(struct mm_struct *mm)
70{
71 /*
72 * NULL is theoretically a valid place to put the bounds
73 * directory, so point this at an invalid address.
74 */
75 mm->bd_addr = MPX_INVALID_BOUNDS_DIR;
76}
77void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
78 unsigned long start, unsigned long end);
79#else
80static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
81 struct xsave_struct *xsave_buf)
82{
83 return NULL;
84}
85static inline int mpx_handle_bd_fault(struct xsave_struct *xsave_buf)
86{
87 return -EINVAL;
88}
89static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
90{
91 return 0;
92}
93static inline void mpx_mm_init(struct mm_struct *mm)
94{
95}
96static inline void mpx_notify_unmap(struct mm_struct *mm,
97 struct vm_area_struct *vma,
98 unsigned long start, unsigned long end)
99{
100}
101#endif /* CONFIG_X86_INTEL_MPX */
102
103#endif /* _ASM_X86_MPX_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index cd6e1610e29e..32444ae939ca 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -330,13 +330,13 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
330 PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next); 330 PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next);
331} 331}
332 332
333static inline void arch_dup_mmap(struct mm_struct *oldmm, 333static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm,
334 struct mm_struct *mm) 334 struct mm_struct *mm)
335{ 335{
336 PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm); 336 PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm);
337} 337}
338 338
339static inline void arch_exit_mmap(struct mm_struct *mm) 339static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
340{ 340{
341 PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm); 341 PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm);
342} 342}
@@ -986,5 +986,15 @@ extern void default_banner(void);
986#endif /* __ASSEMBLY__ */ 986#endif /* __ASSEMBLY__ */
987#else /* CONFIG_PARAVIRT */ 987#else /* CONFIG_PARAVIRT */
988# define default_banner x86_init_noop 988# define default_banner x86_init_noop
989#ifndef __ASSEMBLY__
990static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm,
991 struct mm_struct *mm)
992{
993}
994
995static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
996{
997}
998#endif /* __ASSEMBLY__ */
989#endif /* !CONFIG_PARAVIRT */ 999#endif /* !CONFIG_PARAVIRT */
990#endif /* _ASM_X86_PARAVIRT_H */ 1000#endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index eb71ec794732..9617a1716813 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -374,13 +374,14 @@ struct lwp_struct {
374 u8 reserved[128]; 374 u8 reserved[128];
375}; 375};
376 376
377struct bndregs_struct { 377struct bndreg {
378 u64 bndregs[8]; 378 u64 lower_bound;
379 u64 upper_bound;
379} __packed; 380} __packed;
380 381
381struct bndcsr_struct { 382struct bndcsr {
382 u64 cfg_reg_u; 383 u64 bndcfgu;
383 u64 status_reg; 384 u64 bndstatus;
384} __packed; 385} __packed;
385 386
386struct xsave_hdr_struct { 387struct xsave_hdr_struct {
@@ -394,8 +395,8 @@ struct xsave_struct {
394 struct xsave_hdr_struct xsave_hdr; 395 struct xsave_hdr_struct xsave_hdr;
395 struct ymmh_struct ymmh; 396 struct ymmh_struct ymmh;
396 struct lwp_struct lwp; 397 struct lwp_struct lwp;
397 struct bndregs_struct bndregs; 398 struct bndreg bndreg[4];
398 struct bndcsr_struct bndcsr; 399 struct bndcsr bndcsr;
399 /* new processor state extensions will go here */ 400 /* new processor state extensions will go here */
400} __attribute__ ((packed, aligned (64))); 401} __attribute__ ((packed, aligned (64)));
401 402
@@ -953,6 +954,24 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
953extern int get_tsc_mode(unsigned long adr); 954extern int get_tsc_mode(unsigned long adr);
954extern int set_tsc_mode(unsigned int val); 955extern int set_tsc_mode(unsigned int val);
955 956
957/* Register/unregister a process' MPX related resource */
958#define MPX_ENABLE_MANAGEMENT(tsk) mpx_enable_management((tsk))
959#define MPX_DISABLE_MANAGEMENT(tsk) mpx_disable_management((tsk))
960
961#ifdef CONFIG_X86_INTEL_MPX
962extern int mpx_enable_management(struct task_struct *tsk);
963extern int mpx_disable_management(struct task_struct *tsk);
964#else
965static inline int mpx_enable_management(struct task_struct *tsk)
966{
967 return -EINVAL;
968}
969static inline int mpx_disable_management(struct task_struct *tsk)
970{
971 return -EINVAL;
972}
973#endif /* CONFIG_X86_INTEL_MPX */
974
956extern u16 amd_get_nb_id(int cpu); 975extern u16 amd_get_nb_id(int cpu);
957 976
958static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) 977static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 495ae9793628..3c895d480cd7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -724,6 +724,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
724 unsigned long ip = regs->ip; 724 unsigned long ip = regs->ip;
725 int is_64bit = 0; 725 int is_64bit = 0;
726 void *kaddr; 726 void *kaddr;
727 int size;
727 728
728 /* 729 /*
729 * We don't need to fixup if the PEBS assist is fault like 730 * We don't need to fixup if the PEBS assist is fault like
@@ -758,11 +759,12 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
758 return 1; 759 return 1;
759 } 760 }
760 761
762 size = ip - to;
761 if (!kernel_ip(ip)) { 763 if (!kernel_ip(ip)) {
762 int size, bytes; 764 int bytes;
763 u8 *buf = this_cpu_read(insn_buffer); 765 u8 *buf = this_cpu_read(insn_buffer);
764 766
765 size = ip - to; /* Must fit our buffer, see above */ 767 /* 'size' must fit our buffer, see above */
766 bytes = copy_from_user_nmi(buf, (void __user *)to, size); 768 bytes = copy_from_user_nmi(buf, (void __user *)to, size);
767 if (bytes != 0) 769 if (bytes != 0)
768 return 0; 770 return 0;
@@ -780,11 +782,20 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
780#ifdef CONFIG_X86_64 782#ifdef CONFIG_X86_64
781 is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); 783 is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
782#endif 784#endif
783 insn_init(&insn, kaddr, is_64bit); 785 insn_init(&insn, kaddr, size, is_64bit);
784 insn_get_length(&insn); 786 insn_get_length(&insn);
787 /*
788 * Make sure there was not a problem decoding the
789 * instruction and getting the length. This is
790 * doubly important because we have an infinite
791 * loop if insn.length=0.
792 */
793 if (!insn.length)
794 break;
785 795
786 to += insn.length; 796 to += insn.length;
787 kaddr += insn.length; 797 kaddr += insn.length;
798 size -= insn.length;
788 } while (to < ip); 799 } while (to < ip);
789 800
790 if (to == ip) { 801 if (to == ip) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 45fa730a5283..58f1a94beaf0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -465,7 +465,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
465{ 465{
466 struct insn insn; 466 struct insn insn;
467 void *addr; 467 void *addr;
468 int bytes, size = MAX_INSN_SIZE; 468 int bytes_read, bytes_left;
469 int ret = X86_BR_NONE; 469 int ret = X86_BR_NONE;
470 int ext, to_plm, from_plm; 470 int ext, to_plm, from_plm;
471 u8 buf[MAX_INSN_SIZE]; 471 u8 buf[MAX_INSN_SIZE];
@@ -493,8 +493,10 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
493 return X86_BR_NONE; 493 return X86_BR_NONE;
494 494
495 /* may fail if text not present */ 495 /* may fail if text not present */
496 bytes = copy_from_user_nmi(buf, (void __user *)from, size); 496 bytes_left = copy_from_user_nmi(buf, (void __user *)from,
497 if (bytes != 0) 497 MAX_INSN_SIZE);
498 bytes_read = MAX_INSN_SIZE - bytes_left;
499 if (!bytes_read)
498 return X86_BR_NONE; 500 return X86_BR_NONE;
499 501
500 addr = buf; 502 addr = buf;
@@ -505,10 +507,19 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
505 * Ensure we don't blindy read any address by validating it is 507 * Ensure we don't blindy read any address by validating it is
506 * a known text address. 508 * a known text address.
507 */ 509 */
508 if (kernel_text_address(from)) 510 if (kernel_text_address(from)) {
509 addr = (void *)from; 511 addr = (void *)from;
510 else 512 /*
513 * Assume we can get the maximum possible size
514 * when grabbing kernel data. This is not
515 * _strictly_ true since we could possibly be
516 * executing up next to a memory hole, but
517 * it is very unlikely to be a problem.
518 */
519 bytes_read = MAX_INSN_SIZE;
520 } else {
511 return X86_BR_NONE; 521 return X86_BR_NONE;
522 }
512 } 523 }
513 524
514 /* 525 /*
@@ -518,8 +529,10 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
518#ifdef CONFIG_X86_64 529#ifdef CONFIG_X86_64
519 is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); 530 is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
520#endif 531#endif
521 insn_init(&insn, addr, is64); 532 insn_init(&insn, addr, bytes_read, is64);
522 insn_get_opcode(&insn); 533 insn_get_opcode(&insn);
534 if (!insn.opcode.got)
535 return X86_BR_ABORT;
523 536
524 switch (insn.opcode.bytes[0]) { 537 switch (insn.opcode.bytes[0]) {
525 case 0xf: 538 case 0xf:
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 67e6d19ef1be..f7e3cd50ece0 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -285,7 +285,7 @@ static int can_probe(unsigned long paddr)
285 * normally used, we just go through if there is no kprobe. 285 * normally used, we just go through if there is no kprobe.
286 */ 286 */
287 __addr = recover_probed_instruction(buf, addr); 287 __addr = recover_probed_instruction(buf, addr);
288 kernel_insn_init(&insn, (void *)__addr); 288 kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE);
289 insn_get_length(&insn); 289 insn_get_length(&insn);
290 290
291 /* 291 /*
@@ -330,8 +330,10 @@ int __copy_instruction(u8 *dest, u8 *src)
330{ 330{
331 struct insn insn; 331 struct insn insn;
332 kprobe_opcode_t buf[MAX_INSN_SIZE]; 332 kprobe_opcode_t buf[MAX_INSN_SIZE];
333 unsigned long recovered_insn =
334 recover_probed_instruction(buf, (unsigned long)src);
333 335
334 kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src)); 336 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
335 insn_get_length(&insn); 337 insn_get_length(&insn);
336 /* Another subsystem puts a breakpoint, failed to recover */ 338 /* Another subsystem puts a breakpoint, failed to recover */
337 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 339 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
@@ -342,7 +344,7 @@ int __copy_instruction(u8 *dest, u8 *src)
342 if (insn_rip_relative(&insn)) { 344 if (insn_rip_relative(&insn)) {
343 s64 newdisp; 345 s64 newdisp;
344 u8 *disp; 346 u8 *disp;
345 kernel_insn_init(&insn, dest); 347 kernel_insn_init(&insn, dest, insn.length);
346 insn_get_displacement(&insn); 348 insn_get_displacement(&insn);
347 /* 349 /*
348 * The copied instruction uses the %rip-relative addressing 350 * The copied instruction uses the %rip-relative addressing
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index f1314d0bcf0a..7c523bbf3dc8 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -251,13 +251,15 @@ static int can_optimize(unsigned long paddr)
251 /* Decode instructions */ 251 /* Decode instructions */
252 addr = paddr - offset; 252 addr = paddr - offset;
253 while (addr < paddr - offset + size) { /* Decode until function end */ 253 while (addr < paddr - offset + size) { /* Decode until function end */
254 unsigned long recovered_insn;
254 if (search_exception_tables(addr)) 255 if (search_exception_tables(addr))
255 /* 256 /*
256 * Since some fixup code will jumps into this function, 257 * Since some fixup code will jumps into this function,
257 * we can't optimize kprobe in this function. 258 * we can't optimize kprobe in this function.
258 */ 259 */
259 return 0; 260 return 0;
260 kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr)); 261 recovered_insn = recover_probed_instruction(buf, addr);
262 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
261 insn_get_length(&insn); 263 insn_get_length(&insn);
262 /* Another subsystem puts a breakpoint */ 264 /* Another subsystem puts a breakpoint */
263 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 265 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ab08aa2276fb..214245d6b996 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -960,6 +960,8 @@ void __init setup_arch(char **cmdline_p)
960 init_mm.end_data = (unsigned long) _edata; 960 init_mm.end_data = (unsigned long) _edata;
961 init_mm.brk = _brk_end; 961 init_mm.brk = _brk_end;
962 962
963 mpx_mm_init(&init_mm);
964
963 code_resource.start = __pa_symbol(_text); 965 code_resource.start = __pa_symbol(_text);
964 code_resource.end = __pa_symbol(_etext)-1; 966 code_resource.end = __pa_symbol(_etext)-1;
965 data_resource.start = __pa_symbol(_etext); 967 data_resource.start = __pa_symbol(_etext);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 07ab8e9733c5..a9ae20579895 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -60,6 +60,7 @@
60#include <asm/fixmap.h> 60#include <asm/fixmap.h>
61#include <asm/mach_traps.h> 61#include <asm/mach_traps.h>
62#include <asm/alternative.h> 62#include <asm/alternative.h>
63#include <asm/mpx.h>
63 64
64#ifdef CONFIG_X86_64 65#ifdef CONFIG_X86_64
65#include <asm/x86_init.h> 66#include <asm/x86_init.h>
@@ -228,7 +229,6 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
228 229
229DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error) 230DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error)
230DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) 231DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow)
231DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds)
232DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) 232DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op)
233DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) 233DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun)
234DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) 234DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS)
@@ -286,6 +286,89 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
286} 286}
287#endif 287#endif
288 288
289dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
290{
291 struct task_struct *tsk = current;
292 struct xsave_struct *xsave_buf;
293 enum ctx_state prev_state;
294 struct bndcsr *bndcsr;
295 siginfo_t *info;
296
297 prev_state = exception_enter();
298 if (notify_die(DIE_TRAP, "bounds", regs, error_code,
299 X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
300 goto exit;
301 conditional_sti(regs);
302
303 if (!user_mode(regs))
304 die("bounds", regs, error_code);
305
306 if (!cpu_feature_enabled(X86_FEATURE_MPX)) {
307 /* The exception is not from Intel MPX */
308 goto exit_trap;
309 }
310
311 /*
312 * We need to look at BNDSTATUS to resolve this exception.
313 * It is not directly accessible, though, so we need to
314 * do an xsave and then pull it out of the xsave buffer.
315 */
316 fpu_save_init(&tsk->thread.fpu);
317 xsave_buf = &(tsk->thread.fpu.state->xsave);
318 bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR);
319 if (!bndcsr)
320 goto exit_trap;
321
322 /*
323 * The error code field of the BNDSTATUS register communicates status
324 * information of a bound range exception #BR or operation involving
325 * bound directory.
326 */
327 switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) {
328 case 2: /* Bound directory has invalid entry. */
329 if (mpx_handle_bd_fault(xsave_buf))
330 goto exit_trap;
331 break; /* Success, it was handled */
332 case 1: /* Bound violation. */
333 info = mpx_generate_siginfo(regs, xsave_buf);
334 if (PTR_ERR(info)) {
335 /*
336 * We failed to decode the MPX instruction. Act as if
337 * the exception was not caused by MPX.
338 */
339 goto exit_trap;
340 }
341 /*
342 * Success, we decoded the instruction and retrieved
343 * an 'info' containing the address being accessed
344 * which caused the exception. This information
345 * allows and application to possibly handle the
346 * #BR exception itself.
347 */
348 do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, info);
349 kfree(info);
350 break;
351 case 0: /* No exception caused by Intel MPX operations. */
352 goto exit_trap;
353 default:
354 die("bounds", regs, error_code);
355 }
356
357exit:
358 exception_exit(prev_state);
359 return;
360exit_trap:
361 /*
362 * This path out is for all the cases where we could not
363 * handle the exception in some way (like allocating a
364 * table or telling userspace about it. We will also end
365 * up here if the kernel has MPX turned off at compile
366 * time..
367 */
368 do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL);
369 exception_exit(prev_state);
370}
371
289dotraplinkage void 372dotraplinkage void
290do_general_protection(struct pt_regs *regs, long error_code) 373do_general_protection(struct pt_regs *regs, long error_code)
291{ 374{
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 5d1cbfe4ae58..8b96a947021f 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -219,7 +219,7 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
219{ 219{
220 u32 volatile *good_insns; 220 u32 volatile *good_insns;
221 221
222 insn_init(insn, auprobe->insn, x86_64); 222 insn_init(insn, auprobe->insn, sizeof(auprobe->insn), x86_64);
223 /* has the side-effect of processing the entire instruction */ 223 /* has the side-effect of processing the entire instruction */
224 insn_get_length(insn); 224 insn_get_length(insn);
225 if (WARN_ON_ONCE(!insn_complete(insn))) 225 if (WARN_ON_ONCE(!insn_complete(insn)))
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 54fcffed28ed..2480978b31cc 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -28,7 +28,7 @@
28 28
29/* Verify next sizeof(t) bytes can be on the same instruction */ 29/* Verify next sizeof(t) bytes can be on the same instruction */
30#define validate_next(t, insn, n) \ 30#define validate_next(t, insn, n) \
31 ((insn)->next_byte + sizeof(t) + n - (insn)->kaddr <= MAX_INSN_SIZE) 31 ((insn)->next_byte + sizeof(t) + n < (insn)->end_kaddr)
32 32
33#define __get_next(t, insn) \ 33#define __get_next(t, insn) \
34 ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) 34 ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
@@ -50,10 +50,11 @@
50 * @kaddr: address (in kernel memory) of instruction (or copy thereof) 50 * @kaddr: address (in kernel memory) of instruction (or copy thereof)
51 * @x86_64: !0 for 64-bit kernel or 64-bit app 51 * @x86_64: !0 for 64-bit kernel or 64-bit app
52 */ 52 */
53void insn_init(struct insn *insn, const void *kaddr, int x86_64) 53void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
54{ 54{
55 memset(insn, 0, sizeof(*insn)); 55 memset(insn, 0, sizeof(*insn));
56 insn->kaddr = kaddr; 56 insn->kaddr = kaddr;
57 insn->end_kaddr = kaddr + buf_len;
57 insn->next_byte = kaddr; 58 insn->next_byte = kaddr;
58 insn->x86_64 = x86_64 ? 1 : 0; 59 insn->x86_64 = x86_64 ? 1 : 0;
59 insn->opnd_bytes = 4; 60 insn->opnd_bytes = 4;
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 6a19ad9f370d..ecfdc46a024a 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -30,3 +30,5 @@ obj-$(CONFIG_ACPI_NUMA) += srat.o
30obj-$(CONFIG_NUMA_EMU) += numa_emulation.o 30obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
31 31
32obj-$(CONFIG_MEMTEST) += memtest.o 32obj-$(CONFIG_MEMTEST) += memtest.o
33
34obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
new file mode 100644
index 000000000000..67ebf5751222
--- /dev/null
+++ b/arch/x86/mm/mpx.c
@@ -0,0 +1,928 @@
1/*
2 * mpx.c - Memory Protection eXtensions
3 *
4 * Copyright (c) 2014, Intel Corporation.
5 * Qiaowei Ren <qiaowei.ren@intel.com>
6 * Dave Hansen <dave.hansen@intel.com>
7 */
8#include <linux/kernel.h>
9#include <linux/slab.h>
10#include <linux/syscalls.h>
11#include <linux/sched/sysctl.h>
12
13#include <asm/i387.h>
14#include <asm/insn.h>
15#include <asm/mman.h>
16#include <asm/mmu_context.h>
17#include <asm/mpx.h>
18#include <asm/processor.h>
19#include <asm/fpu-internal.h>
20
21static const char *mpx_mapping_name(struct vm_area_struct *vma)
22{
23 return "[mpx]";
24}
25
26static struct vm_operations_struct mpx_vma_ops = {
27 .name = mpx_mapping_name,
28};
29
30static int is_mpx_vma(struct vm_area_struct *vma)
31{
32 return (vma->vm_ops == &mpx_vma_ops);
33}
34
35/*
36 * This is really a simplified "vm_mmap". it only handles MPX
37 * bounds tables (the bounds directory is user-allocated).
38 *
39 * Later on, we use the vma->vm_ops to uniquely identify these
40 * VMAs.
41 */
42static unsigned long mpx_mmap(unsigned long len)
43{
44 unsigned long ret;
45 unsigned long addr, pgoff;
46 struct mm_struct *mm = current->mm;
47 vm_flags_t vm_flags;
48 struct vm_area_struct *vma;
49
50 /* Only bounds table and bounds directory can be allocated here */
51 if (len != MPX_BD_SIZE_BYTES && len != MPX_BT_SIZE_BYTES)
52 return -EINVAL;
53
54 down_write(&mm->mmap_sem);
55
56 /* Too many mappings? */
57 if (mm->map_count > sysctl_max_map_count) {
58 ret = -ENOMEM;
59 goto out;
60 }
61
62 /* Obtain the address to map to. we verify (or select) it and ensure
63 * that it represents a valid section of the address space.
64 */
65 addr = get_unmapped_area(NULL, 0, len, 0, MAP_ANONYMOUS | MAP_PRIVATE);
66 if (addr & ~PAGE_MASK) {
67 ret = addr;
68 goto out;
69 }
70
71 vm_flags = VM_READ | VM_WRITE | VM_MPX |
72 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
73
74 /* Set pgoff according to addr for anon_vma */
75 pgoff = addr >> PAGE_SHIFT;
76
77 ret = mmap_region(NULL, addr, len, vm_flags, pgoff);
78 if (IS_ERR_VALUE(ret))
79 goto out;
80
81 vma = find_vma(mm, ret);
82 if (!vma) {
83 ret = -ENOMEM;
84 goto out;
85 }
86 vma->vm_ops = &mpx_vma_ops;
87
88 if (vm_flags & VM_LOCKED) {
89 up_write(&mm->mmap_sem);
90 mm_populate(ret, len);
91 return ret;
92 }
93
94out:
95 up_write(&mm->mmap_sem);
96 return ret;
97}
98
99enum reg_type {
100 REG_TYPE_RM = 0,
101 REG_TYPE_INDEX,
102 REG_TYPE_BASE,
103};
104
105static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
106 enum reg_type type)
107{
108 int regno = 0;
109
110 static const int regoff[] = {
111 offsetof(struct pt_regs, ax),
112 offsetof(struct pt_regs, cx),
113 offsetof(struct pt_regs, dx),
114 offsetof(struct pt_regs, bx),
115 offsetof(struct pt_regs, sp),
116 offsetof(struct pt_regs, bp),
117 offsetof(struct pt_regs, si),
118 offsetof(struct pt_regs, di),
119#ifdef CONFIG_X86_64
120 offsetof(struct pt_regs, r8),
121 offsetof(struct pt_regs, r9),
122 offsetof(struct pt_regs, r10),
123 offsetof(struct pt_regs, r11),
124 offsetof(struct pt_regs, r12),
125 offsetof(struct pt_regs, r13),
126 offsetof(struct pt_regs, r14),
127 offsetof(struct pt_regs, r15),
128#endif
129 };
130 int nr_registers = ARRAY_SIZE(regoff);
131 /*
132 * Don't possibly decode a 32-bit instructions as
133 * reading a 64-bit-only register.
134 */
135 if (IS_ENABLED(CONFIG_X86_64) && !insn->x86_64)
136 nr_registers -= 8;
137
138 switch (type) {
139 case REG_TYPE_RM:
140 regno = X86_MODRM_RM(insn->modrm.value);
141 if (X86_REX_B(insn->rex_prefix.value) == 1)
142 regno += 8;
143 break;
144
145 case REG_TYPE_INDEX:
146 regno = X86_SIB_INDEX(insn->sib.value);
147 if (X86_REX_X(insn->rex_prefix.value) == 1)
148 regno += 8;
149 break;
150
151 case REG_TYPE_BASE:
152 regno = X86_SIB_BASE(insn->sib.value);
153 if (X86_REX_B(insn->rex_prefix.value) == 1)
154 regno += 8;
155 break;
156
157 default:
158 pr_err("invalid register type");
159 BUG();
160 break;
161 }
162
163 if (regno > nr_registers) {
164 WARN_ONCE(1, "decoded an instruction with an invalid register");
165 return -EINVAL;
166 }
167 return regoff[regno];
168}
169
170/*
171 * return the address being referenced be instruction
172 * for rm=3 returning the content of the rm reg
173 * for rm!=3 calculates the address using SIB and Disp
174 */
175static void __user *mpx_get_addr_ref(struct insn *insn, struct pt_regs *regs)
176{
177 unsigned long addr, base, indx;
178 int addr_offset, base_offset, indx_offset;
179 insn_byte_t sib;
180
181 insn_get_modrm(insn);
182 insn_get_sib(insn);
183 sib = insn->sib.value;
184
185 if (X86_MODRM_MOD(insn->modrm.value) == 3) {
186 addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM);
187 if (addr_offset < 0)
188 goto out_err;
189 addr = regs_get_register(regs, addr_offset);
190 } else {
191 if (insn->sib.nbytes) {
192 base_offset = get_reg_offset(insn, regs, REG_TYPE_BASE);
193 if (base_offset < 0)
194 goto out_err;
195
196 indx_offset = get_reg_offset(insn, regs, REG_TYPE_INDEX);
197 if (indx_offset < 0)
198 goto out_err;
199
200 base = regs_get_register(regs, base_offset);
201 indx = regs_get_register(regs, indx_offset);
202 addr = base + indx * (1 << X86_SIB_SCALE(sib));
203 } else {
204 addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM);
205 if (addr_offset < 0)
206 goto out_err;
207 addr = regs_get_register(regs, addr_offset);
208 }
209 addr += insn->displacement.value;
210 }
211 return (void __user *)addr;
212out_err:
213 return (void __user *)-1;
214}
215
216static int mpx_insn_decode(struct insn *insn,
217 struct pt_regs *regs)
218{
219 unsigned char buf[MAX_INSN_SIZE];
220 int x86_64 = !test_thread_flag(TIF_IA32);
221 int not_copied;
222 int nr_copied;
223
224 not_copied = copy_from_user(buf, (void __user *)regs->ip, sizeof(buf));
225 nr_copied = sizeof(buf) - not_copied;
226 /*
227 * The decoder _should_ fail nicely if we pass it a short buffer.
228 * But, let's not depend on that implementation detail. If we
229 * did not get anything, just error out now.
230 */
231 if (!nr_copied)
232 return -EFAULT;
233 insn_init(insn, buf, nr_copied, x86_64);
234 insn_get_length(insn);
235 /*
236 * copy_from_user() tries to get as many bytes as we could see in
237 * the largest possible instruction. If the instruction we are
238 * after is shorter than that _and_ we attempt to copy from
239 * something unreadable, we might get a short read. This is OK
240 * as long as the read did not stop in the middle of the
241 * instruction. Check to see if we got a partial instruction.
242 */
243 if (nr_copied < insn->length)
244 return -EFAULT;
245
246 insn_get_opcode(insn);
247 /*
248 * We only _really_ need to decode bndcl/bndcn/bndcu
249 * Error out on anything else.
250 */
251 if (insn->opcode.bytes[0] != 0x0f)
252 goto bad_opcode;
253 if ((insn->opcode.bytes[1] != 0x1a) &&
254 (insn->opcode.bytes[1] != 0x1b))
255 goto bad_opcode;
256
257 return 0;
258bad_opcode:
259 return -EINVAL;
260}
261
262/*
263 * If a bounds overflow occurs then a #BR is generated. This
264 * function decodes MPX instructions to get violation address
265 * and set this address into extended struct siginfo.
266 *
267 * Note that this is not a super precise way of doing this.
268 * Userspace could have, by the time we get here, written
269 * anything it wants in to the instructions. We can not
270 * trust anything about it. They might not be valid
271 * instructions or might encode invalid registers, etc...
272 *
273 * The caller is expected to kfree() the returned siginfo_t.
274 */
275siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
276 struct xsave_struct *xsave_buf)
277{
278 struct bndreg *bndregs, *bndreg;
279 siginfo_t *info = NULL;
280 struct insn insn;
281 uint8_t bndregno;
282 int err;
283
284 err = mpx_insn_decode(&insn, regs);
285 if (err)
286 goto err_out;
287
288 /*
289 * We know at this point that we are only dealing with
290 * MPX instructions.
291 */
292 insn_get_modrm(&insn);
293 bndregno = X86_MODRM_REG(insn.modrm.value);
294 if (bndregno > 3) {
295 err = -EINVAL;
296 goto err_out;
297 }
298 /* get the bndregs _area_ of the xsave structure */
299 bndregs = get_xsave_addr(xsave_buf, XSTATE_BNDREGS);
300 if (!bndregs) {
301 err = -EINVAL;
302 goto err_out;
303 }
304 /* now go select the individual register in the set of 4 */
305 bndreg = &bndregs[bndregno];
306
307 info = kzalloc(sizeof(*info), GFP_KERNEL);
308 if (!info) {
309 err = -ENOMEM;
310 goto err_out;
311 }
312 /*
313 * The registers are always 64-bit, but the upper 32
314 * bits are ignored in 32-bit mode. Also, note that the
315 * upper bounds are architecturally represented in 1's
316 * complement form.
317 *
318 * The 'unsigned long' cast is because the compiler
319 * complains when casting from integers to different-size
320 * pointers.
321 */
322 info->si_lower = (void __user *)(unsigned long)bndreg->lower_bound;
323 info->si_upper = (void __user *)(unsigned long)~bndreg->upper_bound;
324 info->si_addr_lsb = 0;
325 info->si_signo = SIGSEGV;
326 info->si_errno = 0;
327 info->si_code = SEGV_BNDERR;
328 info->si_addr = mpx_get_addr_ref(&insn, regs);
329 /*
330 * We were not able to extract an address from the instruction,
331 * probably because there was something invalid in it.
332 */
333 if (info->si_addr == (void *)-1) {
334 err = -EINVAL;
335 goto err_out;
336 }
337 return info;
338err_out:
339 /* info might be NULL, but kfree() handles that */
340 kfree(info);
341 return ERR_PTR(err);
342}
343
344static __user void *task_get_bounds_dir(struct task_struct *tsk)
345{
346 struct bndcsr *bndcsr;
347
348 if (!cpu_feature_enabled(X86_FEATURE_MPX))
349 return MPX_INVALID_BOUNDS_DIR;
350
351 /*
352 * The bounds directory pointer is stored in a register
353 * only accessible if we first do an xsave.
354 */
355 fpu_save_init(&tsk->thread.fpu);
356 bndcsr = get_xsave_addr(&tsk->thread.fpu.state->xsave, XSTATE_BNDCSR);
357 if (!bndcsr)
358 return MPX_INVALID_BOUNDS_DIR;
359
360 /*
361 * Make sure the register looks valid by checking the
362 * enable bit.
363 */
364 if (!(bndcsr->bndcfgu & MPX_BNDCFG_ENABLE_FLAG))
365 return MPX_INVALID_BOUNDS_DIR;
366
367 /*
368 * Lastly, mask off the low bits used for configuration
369 * flags, and return the address of the bounds table.
370 */
371 return (void __user *)(unsigned long)
372 (bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK);
373}
374
375int mpx_enable_management(struct task_struct *tsk)
376{
377 void __user *bd_base = MPX_INVALID_BOUNDS_DIR;
378 struct mm_struct *mm = tsk->mm;
379 int ret = 0;
380
381 /*
382 * runtime in the userspace will be responsible for allocation of
383 * the bounds directory. Then, it will save the base of the bounds
384 * directory into XSAVE/XRSTOR Save Area and enable MPX through
385 * XRSTOR instruction.
386 *
387 * fpu_xsave() is expected to be very expensive. Storing the bounds
388 * directory here means that we do not have to do xsave in the unmap
389 * path; we can just use mm->bd_addr instead.
390 */
391 bd_base = task_get_bounds_dir(tsk);
392 down_write(&mm->mmap_sem);
393 mm->bd_addr = bd_base;
394 if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR)
395 ret = -ENXIO;
396
397 up_write(&mm->mmap_sem);
398 return ret;
399}
400
401int mpx_disable_management(struct task_struct *tsk)
402{
403 struct mm_struct *mm = current->mm;
404
405 if (!cpu_feature_enabled(X86_FEATURE_MPX))
406 return -ENXIO;
407
408 down_write(&mm->mmap_sem);
409 mm->bd_addr = MPX_INVALID_BOUNDS_DIR;
410 up_write(&mm->mmap_sem);
411 return 0;
412}
413
414/*
415 * With 32-bit mode, MPX_BT_SIZE_BYTES is 4MB, and the size of each
416 * bounds table is 16KB. With 64-bit mode, MPX_BT_SIZE_BYTES is 2GB,
417 * and the size of each bounds table is 4MB.
418 */
419static int allocate_bt(long __user *bd_entry)
420{
421 unsigned long expected_old_val = 0;
422 unsigned long actual_old_val = 0;
423 unsigned long bt_addr;
424 int ret = 0;
425
426 /*
427 * Carve the virtual space out of userspace for the new
428 * bounds table:
429 */
430 bt_addr = mpx_mmap(MPX_BT_SIZE_BYTES);
431 if (IS_ERR((void *)bt_addr))
432 return PTR_ERR((void *)bt_addr);
433 /*
434 * Set the valid flag (kinda like _PAGE_PRESENT in a pte)
435 */
436 bt_addr = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
437
438 /*
439 * Go poke the address of the new bounds table in to the
440 * bounds directory entry out in userspace memory. Note:
441 * we may race with another CPU instantiating the same table.
442 * In that case the cmpxchg will see an unexpected
443 * 'actual_old_val'.
444 *
445 * This can fault, but that's OK because we do not hold
446 * mmap_sem at this point, unlike some of the other part
447 * of the MPX code that have to pagefault_disable().
448 */
449 ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry,
450 expected_old_val, bt_addr);
451 if (ret)
452 goto out_unmap;
453
454 /*
455 * The user_atomic_cmpxchg_inatomic() will only return nonzero
456 * for faults, *not* if the cmpxchg itself fails. Now we must
457 * verify that the cmpxchg itself completed successfully.
458 */
459 /*
460 * We expected an empty 'expected_old_val', but instead found
461 * an apparently valid entry. Assume we raced with another
462 * thread to instantiate this table and desclare succecss.
463 */
464 if (actual_old_val & MPX_BD_ENTRY_VALID_FLAG) {
465 ret = 0;
466 goto out_unmap;
467 }
468 /*
469 * We found a non-empty bd_entry but it did not have the
470 * VALID_FLAG set. Return an error which will result in
471 * a SEGV since this probably means that somebody scribbled
472 * some invalid data in to a bounds table.
473 */
474 if (expected_old_val != actual_old_val) {
475 ret = -EINVAL;
476 goto out_unmap;
477 }
478 return 0;
479out_unmap:
480 vm_munmap(bt_addr & MPX_BT_ADDR_MASK, MPX_BT_SIZE_BYTES);
481 return ret;
482}
483
484/*
485 * When a BNDSTX instruction attempts to save bounds to a bounds
486 * table, it will first attempt to look up the table in the
487 * first-level bounds directory. If it does not find a table in
488 * the directory, a #BR is generated and we get here in order to
489 * allocate a new table.
490 *
491 * With 32-bit mode, the size of BD is 4MB, and the size of each
492 * bound table is 16KB. With 64-bit mode, the size of BD is 2GB,
493 * and the size of each bound table is 4MB.
494 */
495static int do_mpx_bt_fault(struct xsave_struct *xsave_buf)
496{
497 unsigned long bd_entry, bd_base;
498 struct bndcsr *bndcsr;
499
500 bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR);
501 if (!bndcsr)
502 return -EINVAL;
503 /*
504 * Mask off the preserve and enable bits
505 */
506 bd_base = bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK;
507 /*
508 * The hardware provides the address of the missing or invalid
509 * entry via BNDSTATUS, so we don't have to go look it up.
510 */
511 bd_entry = bndcsr->bndstatus & MPX_BNDSTA_ADDR_MASK;
512 /*
513 * Make sure the directory entry is within where we think
514 * the directory is.
515 */
516 if ((bd_entry < bd_base) ||
517 (bd_entry >= bd_base + MPX_BD_SIZE_BYTES))
518 return -EINVAL;
519
520 return allocate_bt((long __user *)bd_entry);
521}
522
523int mpx_handle_bd_fault(struct xsave_struct *xsave_buf)
524{
525 /*
526 * Userspace never asked us to manage the bounds tables,
527 * so refuse to help.
528 */
529 if (!kernel_managing_mpx_tables(current->mm))
530 return -EINVAL;
531
532 if (do_mpx_bt_fault(xsave_buf)) {
533 force_sig(SIGSEGV, current);
534 /*
535 * The force_sig() is essentially "handling" this
536 * exception, so we do not pass up the error
537 * from do_mpx_bt_fault().
538 */
539 }
540 return 0;
541}
542
543/*
544 * A thin wrapper around get_user_pages(). Returns 0 if the
545 * fault was resolved or -errno if not.
546 */
547static int mpx_resolve_fault(long __user *addr, int write)
548{
549 long gup_ret;
550 int nr_pages = 1;
551 int force = 0;
552
553 gup_ret = get_user_pages(current, current->mm, (unsigned long)addr,
554 nr_pages, write, force, NULL, NULL);
555 /*
556 * get_user_pages() returns number of pages gotten.
557 * 0 means we failed to fault in and get anything,
558 * probably because 'addr' is bad.
559 */
560 if (!gup_ret)
561 return -EFAULT;
562 /* Other error, return it */
563 if (gup_ret < 0)
564 return gup_ret;
565 /* must have gup'd a page and gup_ret>0, success */
566 return 0;
567}
568
569/*
570 * Get the base of bounds tables pointed by specific bounds
571 * directory entry.
572 */
573static int get_bt_addr(struct mm_struct *mm,
574 long __user *bd_entry, unsigned long *bt_addr)
575{
576 int ret;
577 int valid_bit;
578
579 if (!access_ok(VERIFY_READ, (bd_entry), sizeof(*bd_entry)))
580 return -EFAULT;
581
582 while (1) {
583 int need_write = 0;
584
585 pagefault_disable();
586 ret = get_user(*bt_addr, bd_entry);
587 pagefault_enable();
588 if (!ret)
589 break;
590 if (ret == -EFAULT)
591 ret = mpx_resolve_fault(bd_entry, need_write);
592 /*
593 * If we could not resolve the fault, consider it
594 * userspace's fault and error out.
595 */
596 if (ret)
597 return ret;
598 }
599
600 valid_bit = *bt_addr & MPX_BD_ENTRY_VALID_FLAG;
601 *bt_addr &= MPX_BT_ADDR_MASK;
602
603 /*
604 * When the kernel is managing bounds tables, a bounds directory
605 * entry will either have a valid address (plus the valid bit)
606 * *OR* be completely empty. If we see a !valid entry *and* some
607 * data in the address field, we know something is wrong. This
608 * -EINVAL return will cause a SIGSEGV.
609 */
610 if (!valid_bit && *bt_addr)
611 return -EINVAL;
612 /*
613 * Do we have an completely zeroed bt entry? That is OK. It
614 * just means there was no bounds table for this memory. Make
615 * sure to distinguish this from -EINVAL, which will cause
616 * a SEGV.
617 */
618 if (!valid_bit)
619 return -ENOENT;
620
621 return 0;
622}
623
624/*
625 * Free the backing physical pages of bounds table 'bt_addr'.
626 * Assume start...end is within that bounds table.
627 */
628static int zap_bt_entries(struct mm_struct *mm,
629 unsigned long bt_addr,
630 unsigned long start, unsigned long end)
631{
632 struct vm_area_struct *vma;
633 unsigned long addr, len;
634
635 /*
636 * Find the first overlapping vma. If vma->vm_start > start, there
637 * will be a hole in the bounds table. This -EINVAL return will
638 * cause a SIGSEGV.
639 */
640 vma = find_vma(mm, start);
641 if (!vma || vma->vm_start > start)
642 return -EINVAL;
643
644 /*
645 * A NUMA policy on a VM_MPX VMA could cause this bouds table to
646 * be split. So we need to look across the entire 'start -> end'
647 * range of this bounds table, find all of the VM_MPX VMAs, and
648 * zap only those.
649 */
650 addr = start;
651 while (vma && vma->vm_start < end) {
652 /*
653 * We followed a bounds directory entry down
654 * here. If we find a non-MPX VMA, that's bad,
655 * so stop immediately and return an error. This
656 * probably results in a SIGSEGV.
657 */
658 if (!is_mpx_vma(vma))
659 return -EINVAL;
660
661 len = min(vma->vm_end, end) - addr;
662 zap_page_range(vma, addr, len, NULL);
663
664 vma = vma->vm_next;
665 addr = vma->vm_start;
666 }
667
668 return 0;
669}
670
671static int unmap_single_bt(struct mm_struct *mm,
672 long __user *bd_entry, unsigned long bt_addr)
673{
674 unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
675 unsigned long actual_old_val = 0;
676 int ret;
677
678 while (1) {
679 int need_write = 1;
680
681 pagefault_disable();
682 ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry,
683 expected_old_val, 0);
684 pagefault_enable();
685 if (!ret)
686 break;
687 if (ret == -EFAULT)
688 ret = mpx_resolve_fault(bd_entry, need_write);
689 /*
690 * If we could not resolve the fault, consider it
691 * userspace's fault and error out.
692 */
693 if (ret)
694 return ret;
695 }
696 /*
697 * The cmpxchg was performed, check the results.
698 */
699 if (actual_old_val != expected_old_val) {
700 /*
701 * Someone else raced with us to unmap the table.
702 * There was no bounds table pointed to by the
703 * directory, so declare success. Somebody freed
704 * it.
705 */
706 if (!actual_old_val)
707 return 0;
708 /*
709 * Something messed with the bounds directory
710 * entry. We hold mmap_sem for read or write
711 * here, so it could not be a _new_ bounds table
712 * that someone just allocated. Something is
713 * wrong, so pass up the error and SIGSEGV.
714 */
715 return -EINVAL;
716 }
717
718 /*
719 * Note, we are likely being called under do_munmap() already. To
720 * avoid recursion, do_munmap() will check whether it comes
721 * from one bounds table through VM_MPX flag.
722 */
723 return do_munmap(mm, bt_addr, MPX_BT_SIZE_BYTES);
724}
725
726/*
727 * If the bounds table pointed by bounds directory 'bd_entry' is
728 * not shared, unmap this whole bounds table. Otherwise, only free
729 * those backing physical pages of bounds table entries covered
730 * in this virtual address region start...end.
731 */
732static int unmap_shared_bt(struct mm_struct *mm,
733 long __user *bd_entry, unsigned long start,
734 unsigned long end, bool prev_shared, bool next_shared)
735{
736 unsigned long bt_addr;
737 int ret;
738
739 ret = get_bt_addr(mm, bd_entry, &bt_addr);
740 /*
741 * We could see an "error" ret for not-present bounds
742 * tables (not really an error), or actual errors, but
743 * stop unmapping either way.
744 */
745 if (ret)
746 return ret;
747
748 if (prev_shared && next_shared)
749 ret = zap_bt_entries(mm, bt_addr,
750 bt_addr+MPX_GET_BT_ENTRY_OFFSET(start),
751 bt_addr+MPX_GET_BT_ENTRY_OFFSET(end));
752 else if (prev_shared)
753 ret = zap_bt_entries(mm, bt_addr,
754 bt_addr+MPX_GET_BT_ENTRY_OFFSET(start),
755 bt_addr+MPX_BT_SIZE_BYTES);
756 else if (next_shared)
757 ret = zap_bt_entries(mm, bt_addr, bt_addr,
758 bt_addr+MPX_GET_BT_ENTRY_OFFSET(end));
759 else
760 ret = unmap_single_bt(mm, bd_entry, bt_addr);
761
762 return ret;
763}
764
765/*
766 * A virtual address region being munmap()ed might share bounds table
767 * with adjacent VMAs. We only need to free the backing physical
768 * memory of these shared bounds tables entries covered in this virtual
769 * address region.
770 */
771static int unmap_edge_bts(struct mm_struct *mm,
772 unsigned long start, unsigned long end)
773{
774 int ret;
775 long __user *bde_start, *bde_end;
776 struct vm_area_struct *prev, *next;
777 bool prev_shared = false, next_shared = false;
778
779 bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start);
780 bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1);
781
782 /*
783 * Check whether bde_start and bde_end are shared with adjacent
784 * VMAs.
785 *
786 * We already unliked the VMAs from the mm's rbtree so 'start'
787 * is guaranteed to be in a hole. This gets us the first VMA
788 * before the hole in to 'prev' and the next VMA after the hole
789 * in to 'next'.
790 */
791 next = find_vma_prev(mm, start, &prev);
792 if (prev && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(prev->vm_end-1))
793 == bde_start)
794 prev_shared = true;
795 if (next && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(next->vm_start))
796 == bde_end)
797 next_shared = true;
798
799 /*
800 * This virtual address region being munmap()ed is only
801 * covered by one bounds table.
802 *
803 * In this case, if this table is also shared with adjacent
804 * VMAs, only part of the backing physical memory of the bounds
805 * table need be freeed. Otherwise the whole bounds table need
806 * be unmapped.
807 */
808 if (bde_start == bde_end) {
809 return unmap_shared_bt(mm, bde_start, start, end,
810 prev_shared, next_shared);
811 }
812
813 /*
814 * If more than one bounds tables are covered in this virtual
815 * address region being munmap()ed, we need to separately check
816 * whether bde_start and bde_end are shared with adjacent VMAs.
817 */
818 ret = unmap_shared_bt(mm, bde_start, start, end, prev_shared, false);
819 if (ret)
820 return ret;
821 ret = unmap_shared_bt(mm, bde_end, start, end, false, next_shared);
822 if (ret)
823 return ret;
824
825 return 0;
826}
827
828static int mpx_unmap_tables(struct mm_struct *mm,
829 unsigned long start, unsigned long end)
830{
831 int ret;
832 long __user *bd_entry, *bde_start, *bde_end;
833 unsigned long bt_addr;
834
835 /*
836 * "Edge" bounds tables are those which are being used by the region
837 * (start -> end), but that may be shared with adjacent areas. If they
838 * turn out to be completely unshared, they will be freed. If they are
839 * shared, we will free the backing store (like an MADV_DONTNEED) for
840 * areas used by this region.
841 */
842 ret = unmap_edge_bts(mm, start, end);
843 switch (ret) {
844 /* non-present tables are OK */
845 case 0:
846 case -ENOENT:
847 /* Success, or no tables to unmap */
848 break;
849 case -EINVAL:
850 case -EFAULT:
851 default:
852 return ret;
853 }
854
855 /*
856 * Only unmap the bounds table that are
857 * 1. fully covered
858 * 2. not at the edges of the mapping, even if full aligned
859 */
860 bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start);
861 bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1);
862 for (bd_entry = bde_start + 1; bd_entry < bde_end; bd_entry++) {
863 ret = get_bt_addr(mm, bd_entry, &bt_addr);
864 switch (ret) {
865 case 0:
866 break;
867 case -ENOENT:
868 /* No table here, try the next one */
869 continue;
870 case -EINVAL:
871 case -EFAULT:
872 default:
873 /*
874 * Note: we are being strict here.
875 * Any time we run in to an issue
876 * unmapping tables, we stop and
877 * SIGSEGV.
878 */
879 return ret;
880 }
881
882 ret = unmap_single_bt(mm, bd_entry, bt_addr);
883 if (ret)
884 return ret;
885 }
886
887 return 0;
888}
889
890/*
891 * Free unused bounds tables covered in a virtual address region being
892 * munmap()ed. Assume end > start.
893 *
894 * This function will be called by do_munmap(), and the VMAs covering
895 * the virtual address region start...end have already been split if
896 * necessary, and the 'vma' is the first vma in this range (start -> end).
897 */
898void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
899 unsigned long start, unsigned long end)
900{
901 int ret;
902
903 /*
904 * Refuse to do anything unless userspace has asked
905 * the kernel to help manage the bounds tables,
906 */
907 if (!kernel_managing_mpx_tables(current->mm))
908 return;
909 /*
910 * This will look across the entire 'start -> end' range,
911 * and find all of the non-VM_MPX VMAs.
912 *
913 * To avoid recursion, if a VM_MPX vma is found in the range
914 * (start->end), we will not continue follow-up work. This
915 * recursion represents having bounds tables for bounds tables,
916 * which should not occur normally. Being strict about it here
917 * helps ensure that we do not have an exploitable stack overflow.
918 */
919 do {
920 if (vma->vm_flags & VM_MPX)
921 return;
922 vma = vma->vm_next;
923 } while (vma && vma->vm_start < end);
924
925 ret = mpx_unmap_tables(mm, start, end);
926 if (ret)
927 force_sig(SIGSEGV, current);
928}
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
index 872eb60e7806..ba70ff232917 100644
--- a/arch/x86/tools/insn_sanity.c
+++ b/arch/x86/tools/insn_sanity.c
@@ -254,7 +254,7 @@ int main(int argc, char **argv)
254 continue; 254 continue;
255 255
256 /* Decode an instruction */ 256 /* Decode an instruction */
257 insn_init(&insn, insn_buf, x86_64); 257 insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64);
258 insn_get_length(&insn); 258 insn_get_length(&insn);
259 259
260 if (insn.next_byte <= insn.kaddr || 260 if (insn.next_byte <= insn.kaddr ||
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c
index 13403fc95a96..56f04db0c9c0 100644
--- a/arch/x86/tools/test_get_len.c
+++ b/arch/x86/tools/test_get_len.c
@@ -149,7 +149,7 @@ int main(int argc, char **argv)
149 break; 149 break;
150 } 150 }
151 /* Decode an instruction */ 151 /* Decode an instruction */
152 insn_init(&insn, insn_buf, x86_64); 152 insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64);
153 insn_get_length(&insn); 153 insn_get_length(&insn);
154 if (insn.length != nb) { 154 if (insn.length != nb) {
155 warnings++; 155 warnings++;
diff --git a/fs/exec.c b/fs/exec.c
index 7302b75a9820..01aebe300200 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -277,6 +277,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
277 goto err; 277 goto err;
278 278
279 mm->stack_vm = mm->total_vm = 1; 279 mm->stack_vm = mm->total_vm = 1;
280 arch_bprm_mm_init(mm, vma);
280 up_write(&mm->mmap_sem); 281 up_write(&mm->mmap_sem);
281 bprm->p = vma->vm_end - sizeof(void *); 282 bprm->p = vma->vm_end - sizeof(void *);
282 return 0; 283 return 0;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4e0388cffe3d..f6734c6b66a6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -552,6 +552,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
552 [ilog2(VM_GROWSDOWN)] = "gd", 552 [ilog2(VM_GROWSDOWN)] = "gd",
553 [ilog2(VM_PFNMAP)] = "pf", 553 [ilog2(VM_PFNMAP)] = "pf",
554 [ilog2(VM_DENYWRITE)] = "dw", 554 [ilog2(VM_DENYWRITE)] = "dw",
555#ifdef CONFIG_X86_INTEL_MPX
556 [ilog2(VM_MPX)] = "mp",
557#endif
555 [ilog2(VM_LOCKED)] = "lo", 558 [ilog2(VM_LOCKED)] = "lo",
556 [ilog2(VM_IO)] = "io", 559 [ilog2(VM_IO)] = "io",
557 [ilog2(VM_SEQ_READ)] = "sr", 560 [ilog2(VM_SEQ_READ)] = "sr",
diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h
index 67dea8123683..866aa461efa5 100644
--- a/include/asm-generic/mm_hooks.h
+++ b/include/asm-generic/mm_hooks.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * Define generic no-op hooks for arch_dup_mmap and arch_exit_mmap, to 2 * Define generic no-op hooks for arch_dup_mmap, arch_exit_mmap
3 * be included in asm-FOO/mmu_context.h for any arch FOO which doesn't 3 * and arch_unmap to be included in asm-FOO/mmu_context.h for any
4 * need to hook these. 4 * arch FOO which doesn't need to hook these.
5 */ 5 */
6#ifndef _ASM_GENERIC_MM_HOOKS_H 6#ifndef _ASM_GENERIC_MM_HOOKS_H
7#define _ASM_GENERIC_MM_HOOKS_H 7#define _ASM_GENERIC_MM_HOOKS_H
@@ -15,4 +15,15 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
15{ 15{
16} 16}
17 17
18static inline void arch_unmap(struct mm_struct *mm,
19 struct vm_area_struct *vma,
20 unsigned long start, unsigned long end)
21{
22}
23
24static inline void arch_bprm_mm_init(struct mm_struct *mm,
25 struct vm_area_struct *vma)
26{
27}
28
18#endif /* _ASM_GENERIC_MM_HOOKS_H */ 29#endif /* _ASM_GENERIC_MM_HOOKS_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b46461116cd2..f7606d3a0915 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -128,6 +128,7 @@ extern unsigned int kobjsize(const void *objp);
128#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ 128#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
129#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ 129#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
130#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ 130#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
131#define VM_ARCH_2 0x02000000
131#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ 132#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */
132 133
133#ifdef CONFIG_MEM_SOFT_DIRTY 134#ifdef CONFIG_MEM_SOFT_DIRTY
@@ -155,6 +156,11 @@ extern unsigned int kobjsize(const void *objp);
155# define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */ 156# define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */
156#endif 157#endif
157 158
159#if defined(CONFIG_X86)
160/* MPX specific bounds table or bounds directory */
161# define VM_MPX VM_ARCH_2
162#endif
163
158#ifndef VM_GROWSUP 164#ifndef VM_GROWSUP
159# define VM_GROWSUP VM_NONE 165# define VM_GROWSUP VM_NONE
160#endif 166#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6e0b286649f1..004e9d17b47e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -454,6 +454,10 @@ struct mm_struct {
454 bool tlb_flush_pending; 454 bool tlb_flush_pending;
455#endif 455#endif
456 struct uprobes_state uprobes_state; 456 struct uprobes_state uprobes_state;
457#ifdef CONFIG_X86_INTEL_MPX
458 /* address of the bounds directory */
459 void __user *bd_addr;
460#endif
457}; 461};
458 462
459static inline void mm_init_cpumask(struct mm_struct *mm) 463static inline void mm_init_cpumask(struct mm_struct *mm)
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index ba5be7fdbdfe..1e3552037a5a 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -91,6 +91,10 @@ typedef struct siginfo {
91 int _trapno; /* TRAP # which caused the signal */ 91 int _trapno; /* TRAP # which caused the signal */
92#endif 92#endif
93 short _addr_lsb; /* LSB of the reported address */ 93 short _addr_lsb; /* LSB of the reported address */
94 struct {
95 void __user *_lower;
96 void __user *_upper;
97 } _addr_bnd;
94 } _sigfault; 98 } _sigfault;
95 99
96 /* SIGPOLL */ 100 /* SIGPOLL */
@@ -131,6 +135,8 @@ typedef struct siginfo {
131#define si_trapno _sifields._sigfault._trapno 135#define si_trapno _sifields._sigfault._trapno
132#endif 136#endif
133#define si_addr_lsb _sifields._sigfault._addr_lsb 137#define si_addr_lsb _sifields._sigfault._addr_lsb
138#define si_lower _sifields._sigfault._addr_bnd._lower
139#define si_upper _sifields._sigfault._addr_bnd._upper
134#define si_band _sifields._sigpoll._band 140#define si_band _sifields._sigpoll._band
135#define si_fd _sifields._sigpoll._fd 141#define si_fd _sifields._sigpoll._fd
136#ifdef __ARCH_SIGSYS 142#ifdef __ARCH_SIGSYS
@@ -199,7 +205,8 @@ typedef struct siginfo {
199 */ 205 */
200#define SEGV_MAPERR (__SI_FAULT|1) /* address not mapped to object */ 206#define SEGV_MAPERR (__SI_FAULT|1) /* address not mapped to object */
201#define SEGV_ACCERR (__SI_FAULT|2) /* invalid permissions for mapped object */ 207#define SEGV_ACCERR (__SI_FAULT|2) /* invalid permissions for mapped object */
202#define NSIGSEGV 2 208#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */
209#define NSIGSEGV 3
203 210
204/* 211/*
205 * SIGBUS si_codes 212 * SIGBUS si_codes
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 513df75d0fc9..89f63503f903 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -179,4 +179,10 @@ struct prctl_mm_map {
179#define PR_SET_THP_DISABLE 41 179#define PR_SET_THP_DISABLE 41
180#define PR_GET_THP_DISABLE 42 180#define PR_GET_THP_DISABLE 42
181 181
182/*
183 * Tell the kernel to start/stop helping userspace manage bounds tables.
184 */
185#define PR_MPX_ENABLE_MANAGEMENT 43
186#define PR_MPX_DISABLE_MANAGEMENT 44
187
182#endif /* _LINUX_PRCTL_H */ 188#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/signal.c b/kernel/signal.c
index 19e35135fc60..16a305295256 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2756,6 +2756,10 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
2756 if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) 2756 if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)
2757 err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); 2757 err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
2758#endif 2758#endif
2759#ifdef SEGV_BNDERR
2760 err |= __put_user(from->si_lower, &to->si_lower);
2761 err |= __put_user(from->si_upper, &to->si_upper);
2762#endif
2759 break; 2763 break;
2760 case __SI_CHLD: 2764 case __SI_CHLD:
2761 err |= __put_user(from->si_pid, &to->si_pid); 2765 err |= __put_user(from->si_pid, &to->si_pid);
diff --git a/kernel/sys.c b/kernel/sys.c
index 1eaa2f0b0246..a8c9f5a7dda6 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -91,6 +91,12 @@
91#ifndef SET_TSC_CTL 91#ifndef SET_TSC_CTL
92# define SET_TSC_CTL(a) (-EINVAL) 92# define SET_TSC_CTL(a) (-EINVAL)
93#endif 93#endif
94#ifndef MPX_ENABLE_MANAGEMENT
95# define MPX_ENABLE_MANAGEMENT(a) (-EINVAL)
96#endif
97#ifndef MPX_DISABLE_MANAGEMENT
98# define MPX_DISABLE_MANAGEMENT(a) (-EINVAL)
99#endif
94 100
95/* 101/*
96 * this is where the system-wide overflow UID and GID are defined, for 102 * this is where the system-wide overflow UID and GID are defined, for
@@ -2203,6 +2209,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2203 me->mm->def_flags &= ~VM_NOHUGEPAGE; 2209 me->mm->def_flags &= ~VM_NOHUGEPAGE;
2204 up_write(&me->mm->mmap_sem); 2210 up_write(&me->mm->mmap_sem);
2205 break; 2211 break;
2212 case PR_MPX_ENABLE_MANAGEMENT:
2213 error = MPX_ENABLE_MANAGEMENT(me);
2214 break;
2215 case PR_MPX_DISABLE_MANAGEMENT:
2216 error = MPX_DISABLE_MANAGEMENT(me);
2217 break;
2206 default: 2218 default:
2207 error = -EINVAL; 2219 error = -EINVAL;
2208 break; 2220 break;
diff --git a/mm/mmap.c b/mm/mmap.c
index ae919891a087..b6c0a77fc1c8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2601,6 +2601,8 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
2601 detach_vmas_to_be_unmapped(mm, vma, prev, end); 2601 detach_vmas_to_be_unmapped(mm, vma, prev, end);
2602 unmap_region(mm, vma, prev, start, end); 2602 unmap_region(mm, vma, prev, start, end);
2603 2603
2604 arch_unmap(mm, vma, start, end);
2605
2604 /* Fix up all other VM information */ 2606 /* Fix up all other VM information */
2605 remove_vma_list(mm, vma); 2607 remove_vma_list(mm, vma);
2606 2608