aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDave Hansen <dave.hansen@linux.intel.com>2016-07-29 12:30:12 -0400
committerThomas Gleixner <tglx@linutronix.de>2016-09-09 07:02:26 -0400
commit7d06d9c9bd813fc956b9c7bffc1b9724009983eb (patch)
treeef8572b3d79e694cbd30ea6971dc64db93919ac4 /mm
parente8c6226d483cb28f55cab718065ea1b7226d40e8 (diff)
mm: Implement new pkey_mprotect() system call
pkey_mprotect() is just like mprotect, except it also takes a protection key as an argument. On systems that do not support protection keys, it still works, but requires that key=0. Otherwise it does exactly what mprotect does. I expect it to get used like this, if you want to guarantee that any mapping you create can *never* be accessed without the right protection keys set up. int real_prot = PROT_READ|PROT_WRITE; pkey = pkey_alloc(0, PKEY_DENY_ACCESS); ptr = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); ret = pkey_mprotect(ptr, PAGE_SIZE, real_prot, pkey); This way, there is *no* window where the mapping is accessible since it was always either PROT_NONE or had a protection key set that denied all access. We settled on 'unsigned long' for the type of the key here. We only need 4 bits on x86 today, but I figured that other architectures might need some more space. Semantically, we have a bit of a problem if we combine this syscall with our previously-introduced execute-only support: What do we do when we mix execute-only pkey use with pkey_mprotect() use? For instance: pkey_mprotect(ptr, PAGE_SIZE, PROT_WRITE, 6); // set pkey=6 mprotect(ptr, PAGE_SIZE, PROT_EXEC); // set pkey=X_ONLY_PKEY? mprotect(ptr, PAGE_SIZE, PROT_WRITE); // is pkey=6 again? To solve that, we make the plain-mprotect()-initiated execute-only support only apply to VMAs that have the default protection key (0) set on them. Proposed semantics: 1. protection key 0 is special and represents the default, "unassigned" protection key. It is always allocated. 2. mprotect() never affects a mapping's pkey_mprotect()-assigned protection key. A protection key of 0 (even if set explicitly) represents an unassigned protection key. 2a. mprotect(PROT_EXEC) on a mapping with an assigned protection key may or may not result in a mapping with execute-only properties. pkey_mprotect() plus pkey_set() on all threads should be used to _guarantee_ execute-only semantics if this is not a strong enough semantic. 3. mprotect(PROT_EXEC) may result in an "execute-only" mapping. The kernel will internally attempt to allocate and dedicate a protection key for the purpose of execute-only mappings. This may not be possible in cases where there are no free protection keys available. It can also happen, of course, in situations where there is no hardware support for protection keys. Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Acked-by: Mel Gorman <mgorman@techsingularity.net> Cc: linux-arch@vger.kernel.org Cc: Dave Hansen <dave@sr71.net> Cc: arnd@arndb.de Cc: linux-api@vger.kernel.org Cc: linux-mm@kvack.org Cc: luto@kernel.org Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/20160729163012.3DDD36C4@viggo.jf.intel.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'mm')
-rw-r--r--mm/mprotect.c30
1 files changed, 26 insertions, 4 deletions
diff --git a/mm/mprotect.c b/mm/mprotect.c
index a4830f0325fe..dd3f40a2935f 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -352,8 +352,11 @@ fail:
352 return error; 352 return error;
353} 353}
354 354
355SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, 355/*
356 unsigned long, prot) 356 * pkey==-1 when doing a legacy mprotect()
357 */
358static int do_mprotect_pkey(unsigned long start, size_t len,
359 unsigned long prot, int pkey)
357{ 360{
358 unsigned long nstart, end, tmp, reqprot; 361 unsigned long nstart, end, tmp, reqprot;
359 struct vm_area_struct *vma, *prev; 362 struct vm_area_struct *vma, *prev;
@@ -361,6 +364,12 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
361 const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); 364 const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
362 const bool rier = (current->personality & READ_IMPLIES_EXEC) && 365 const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
363 (prot & PROT_READ); 366 (prot & PROT_READ);
367 /*
368 * A temporary safety check since we are not validating
369 * the pkey before we introduce the allocation code.
370 */
371 if (pkey != -1)
372 return -EINVAL;
364 373
365 prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); 374 prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
366 if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ 375 if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
@@ -409,7 +418,7 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
409 418
410 for (nstart = start ; ; ) { 419 for (nstart = start ; ; ) {
411 unsigned long newflags; 420 unsigned long newflags;
412 int pkey = arch_override_mprotect_pkey(vma, prot, -1); 421 int new_vma_pkey;
413 422
414 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ 423 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */
415 424
@@ -417,7 +426,8 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
417 if (rier && (vma->vm_flags & VM_MAYEXEC)) 426 if (rier && (vma->vm_flags & VM_MAYEXEC))
418 prot |= PROT_EXEC; 427 prot |= PROT_EXEC;
419 428
420 newflags = calc_vm_prot_bits(prot, pkey); 429 new_vma_pkey = arch_override_mprotect_pkey(vma, prot, pkey);
430 newflags = calc_vm_prot_bits(prot, new_vma_pkey);
421 newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); 431 newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
422 432
423 /* newflags >> 4 shift VM_MAY% in place of VM_% */ 433 /* newflags >> 4 shift VM_MAY% in place of VM_% */
@@ -454,3 +464,15 @@ out:
454 up_write(&current->mm->mmap_sem); 464 up_write(&current->mm->mmap_sem);
455 return error; 465 return error;
456} 466}
467
468SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
469 unsigned long, prot)
470{
471 return do_mprotect_pkey(start, len, prot, -1);
472}
473
474SYSCALL_DEFINE4(pkey_mprotect, unsigned long, start, size_t, len,
475 unsigned long, prot, int, pkey)
476{
477 return do_mprotect_pkey(start, len, prot, pkey);
478}