aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Hansen <dave.hansen@linux.intel.com>2016-07-29 12:30:12 -0400
committerThomas Gleixner <tglx@linutronix.de>2016-09-09 07:02:26 -0400
commit7d06d9c9bd813fc956b9c7bffc1b9724009983eb (patch)
treeef8572b3d79e694cbd30ea6971dc64db93919ac4
parente8c6226d483cb28f55cab718065ea1b7226d40e8 (diff)
mm: Implement new pkey_mprotect() system call
pkey_mprotect() is just like mprotect, except it also takes a protection key as an argument. On systems that do not support protection keys, it still works, but requires that key=0. Otherwise it does exactly what mprotect does. I expect it to get used like this, if you want to guarantee that any mapping you create can *never* be accessed without the right protection keys set up. int real_prot = PROT_READ|PROT_WRITE; pkey = pkey_alloc(0, PKEY_DENY_ACCESS); ptr = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); ret = pkey_mprotect(ptr, PAGE_SIZE, real_prot, pkey); This way, there is *no* window where the mapping is accessible since it was always either PROT_NONE or had a protection key set that denied all access. We settled on 'unsigned long' for the type of the key here. We only need 4 bits on x86 today, but I figured that other architectures might need some more space. Semantically, we have a bit of a problem if we combine this syscall with our previously-introduced execute-only support: What do we do when we mix execute-only pkey use with pkey_mprotect() use? For instance: pkey_mprotect(ptr, PAGE_SIZE, PROT_WRITE, 6); // set pkey=6 mprotect(ptr, PAGE_SIZE, PROT_EXEC); // set pkey=X_ONLY_PKEY? mprotect(ptr, PAGE_SIZE, PROT_WRITE); // is pkey=6 again? To solve that, we make the plain-mprotect()-initiated execute-only support only apply to VMAs that have the default protection key (0) set on them. Proposed semantics: 1. protection key 0 is special and represents the default, "unassigned" protection key. It is always allocated. 2. mprotect() never affects a mapping's pkey_mprotect()-assigned protection key. A protection key of 0 (even if set explicitly) represents an unassigned protection key. 2a. mprotect(PROT_EXEC) on a mapping with an assigned protection key may or may not result in a mapping with execute-only properties. pkey_mprotect() plus pkey_set() on all threads should be used to _guarantee_ execute-only semantics if this is not a strong enough semantic. 3. mprotect(PROT_EXEC) may result in an "execute-only" mapping. The kernel will internally attempt to allocate and dedicate a protection key for the purpose of execute-only mappings. This may not be possible in cases where there are no free protection keys available. It can also happen, of course, in situations where there is no hardware support for protection keys. Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Acked-by: Mel Gorman <mgorman@techsingularity.net> Cc: linux-arch@vger.kernel.org Cc: Dave Hansen <dave@sr71.net> Cc: arnd@arndb.de Cc: linux-api@vger.kernel.org Cc: linux-mm@kvack.org Cc: luto@kernel.org Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/20160729163012.3DDD36C4@viggo.jf.intel.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/include/asm/mmu_context.h15
-rw-r--r--arch/x86/include/asm/pkeys.h11
-rw-r--r--include/linux/pkeys.h12
-rw-r--r--mm/mprotect.c30
4 files changed, 45 insertions, 23 deletions
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index d8abfcf524d1..af0251fc85ed 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -4,6 +4,7 @@
4#include <asm/desc.h> 4#include <asm/desc.h>
5#include <linux/atomic.h> 5#include <linux/atomic.h>
6#include <linux/mm_types.h> 6#include <linux/mm_types.h>
7#include <linux/pkeys.h>
7 8
8#include <trace/events/tlb.h> 9#include <trace/events/tlb.h>
9 10
@@ -195,16 +196,20 @@ static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
195 mpx_notify_unmap(mm, vma, start, end); 196 mpx_notify_unmap(mm, vma, start, end);
196} 197}
197 198
199#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
198static inline int vma_pkey(struct vm_area_struct *vma) 200static inline int vma_pkey(struct vm_area_struct *vma)
199{ 201{
200 u16 pkey = 0;
201#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
202 unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 | 202 unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
203 VM_PKEY_BIT2 | VM_PKEY_BIT3; 203 VM_PKEY_BIT2 | VM_PKEY_BIT3;
204 pkey = (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT; 204
205#endif 205 return (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
206 return pkey; 206}
207#else
208static inline int vma_pkey(struct vm_area_struct *vma)
209{
210 return 0;
207} 211}
212#endif
208 213
209static inline bool __pkru_allows_pkey(u16 pkey, bool write) 214static inline bool __pkru_allows_pkey(u16 pkey, bool write)
210{ 215{
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 7b84565c916c..33777c291a85 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -1,7 +1,12 @@
1#ifndef _ASM_X86_PKEYS_H 1#ifndef _ASM_X86_PKEYS_H
2#define _ASM_X86_PKEYS_H 2#define _ASM_X86_PKEYS_H
3 3
4#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) 4#define PKEY_DEDICATED_EXECUTE_ONLY 15
5/*
6 * Consider the PKEY_DEDICATED_EXECUTE_ONLY key unavailable.
7 */
8#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? \
9 PKEY_DEDICATED_EXECUTE_ONLY : 1)
5 10
6extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, 11extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
7 unsigned long init_val); 12 unsigned long init_val);
@@ -10,7 +15,6 @@ extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
10 * Try to dedicate one of the protection keys to be used as an 15 * Try to dedicate one of the protection keys to be used as an
11 * execute-only protection key. 16 * execute-only protection key.
12 */ 17 */
13#define PKEY_DEDICATED_EXECUTE_ONLY 15
14extern int __execute_only_pkey(struct mm_struct *mm); 18extern int __execute_only_pkey(struct mm_struct *mm);
15static inline int execute_only_pkey(struct mm_struct *mm) 19static inline int execute_only_pkey(struct mm_struct *mm)
16{ 20{
@@ -31,4 +35,7 @@ static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
31 return __arch_override_mprotect_pkey(vma, prot, pkey); 35 return __arch_override_mprotect_pkey(vma, prot, pkey);
32} 36}
33 37
38extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
39 unsigned long init_val);
40
34#endif /*_ASM_X86_PKEYS_H */ 41#endif /*_ASM_X86_PKEYS_H */
diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h
index 1d405a2b7272..0030b4024559 100644
--- a/include/linux/pkeys.h
+++ b/include/linux/pkeys.h
@@ -18,16 +18,4 @@
18#define PKEY_DEDICATED_EXECUTE_ONLY 0 18#define PKEY_DEDICATED_EXECUTE_ONLY 0
19#endif /* ! CONFIG_ARCH_HAS_PKEYS */ 19#endif /* ! CONFIG_ARCH_HAS_PKEYS */
20 20
21/*
22 * This is called from mprotect_pkey().
23 *
24 * Returns true if the protection keys is valid.
25 */
26static inline bool validate_pkey(int pkey)
27{
28 if (pkey < 0)
29 return false;
30 return (pkey < arch_max_pkey());
31}
32
33#endif /* _LINUX_PKEYS_H */ 21#endif /* _LINUX_PKEYS_H */
diff --git a/mm/mprotect.c b/mm/mprotect.c
index a4830f0325fe..dd3f40a2935f 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -352,8 +352,11 @@ fail:
352 return error; 352 return error;
353} 353}
354 354
355SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, 355/*
356 unsigned long, prot) 356 * pkey==-1 when doing a legacy mprotect()
357 */
358static int do_mprotect_pkey(unsigned long start, size_t len,
359 unsigned long prot, int pkey)
357{ 360{
358 unsigned long nstart, end, tmp, reqprot; 361 unsigned long nstart, end, tmp, reqprot;
359 struct vm_area_struct *vma, *prev; 362 struct vm_area_struct *vma, *prev;
@@ -361,6 +364,12 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
361 const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); 364 const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
362 const bool rier = (current->personality & READ_IMPLIES_EXEC) && 365 const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
363 (prot & PROT_READ); 366 (prot & PROT_READ);
367 /*
368 * A temporary safety check since we are not validating
369 * the pkey before we introduce the allocation code.
370 */
371 if (pkey != -1)
372 return -EINVAL;
364 373
365 prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); 374 prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
366 if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ 375 if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
@@ -409,7 +418,7 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
409 418
410 for (nstart = start ; ; ) { 419 for (nstart = start ; ; ) {
411 unsigned long newflags; 420 unsigned long newflags;
412 int pkey = arch_override_mprotect_pkey(vma, prot, -1); 421 int new_vma_pkey;
413 422
414 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ 423 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */
415 424
@@ -417,7 +426,8 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
417 if (rier && (vma->vm_flags & VM_MAYEXEC)) 426 if (rier && (vma->vm_flags & VM_MAYEXEC))
418 prot |= PROT_EXEC; 427 prot |= PROT_EXEC;
419 428
420 newflags = calc_vm_prot_bits(prot, pkey); 429 new_vma_pkey = arch_override_mprotect_pkey(vma, prot, pkey);
430 newflags = calc_vm_prot_bits(prot, new_vma_pkey);
421 newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); 431 newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
422 432
423 /* newflags >> 4 shift VM_MAY% in place of VM_% */ 433 /* newflags >> 4 shift VM_MAY% in place of VM_% */
@@ -454,3 +464,15 @@ out:
454 up_write(&current->mm->mmap_sem); 464 up_write(&current->mm->mmap_sem);
455 return error; 465 return error;
456} 466}
467
468SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
469 unsigned long, prot)
470{
471 return do_mprotect_pkey(start, len, prot, -1);
472}
473
474SYSCALL_DEFINE4(pkey_mprotect, unsigned long, start, size_t, len,
475 unsigned long, prot, int, pkey)
476{
477 return do_mprotect_pkey(start, len, prot, pkey);
478}