aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2016-03-08 05:54:42 -0500
committerChristian Borntraeger <borntraeger@de.ibm.com>2016-06-20 03:46:49 -0400
commitb2d73b2a0ad1c758cb0c1acb01a911744b845942 (patch)
tree1b88c5ba8120fc53fc4c9ecb7993e73a7e68c664
parent8ecb1a59d6c6674bc98e4eee0c2482490748e21a (diff)
s390/mm: extended gmap pte notifier
The current gmap pte notifier forces a pte into to a read-write state. If the pte is invalidated the gmap notifier is called to inform KVM that the mapping will go away. Extend this approach to allow read-write, read-only and no-access as possible target states and call the pte notifier for any change to the pte. This mechanism is used to temporarily set specific access rights for a pte without doing the heavy work of a true mprotect call. Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
-rw-r--r--arch/s390/include/asm/gmap.h9
-rw-r--r--arch/s390/include/asm/pgtable.h2
-rw-r--r--arch/s390/kvm/kvm-s390.c13
-rw-r--r--arch/s390/mm/gmap.c170
-rw-r--r--arch/s390/mm/pgtable.c54
5 files changed, 193 insertions, 55 deletions
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 2cf49624af99..6897a0919446 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -59,8 +59,11 @@ void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
59void __gmap_zap(struct gmap *, unsigned long gaddr); 59void __gmap_zap(struct gmap *, unsigned long gaddr);
60void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr); 60void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
61 61
62void gmap_register_ipte_notifier(struct gmap_notifier *); 62void gmap_register_pte_notifier(struct gmap_notifier *);
63void gmap_unregister_ipte_notifier(struct gmap_notifier *); 63void gmap_unregister_pte_notifier(struct gmap_notifier *);
64int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); 64void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *);
65
66int gmap_mprotect_notify(struct gmap *, unsigned long start,
67 unsigned long len, int prot);
65 68
66#endif /* _ASM_S390_GMAP_H */ 69#endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 9951e7e59756..35dde6afffcf 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -886,6 +886,8 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
886 pte_t *ptep, pte_t entry); 886 pte_t *ptep, pte_t entry);
887void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 887void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
888void ptep_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 888void ptep_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
889int ptep_force_prot(struct mm_struct *mm, unsigned long gaddr,
890 pte_t *ptep, int prot);
889void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, 891void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
890 pte_t *ptep , int reset); 892 pte_t *ptep , int reset);
891void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 893void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 67f1b6b4c060..b6e7f66f0f01 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -21,6 +21,7 @@
21#include <linux/init.h> 21#include <linux/init.h>
22#include <linux/kvm.h> 22#include <linux/kvm.h>
23#include <linux/kvm_host.h> 23#include <linux/kvm_host.h>
24#include <linux/mman.h>
24#include <linux/module.h> 25#include <linux/module.h>
25#include <linux/random.h> 26#include <linux/random.h>
26#include <linux/slab.h> 27#include <linux/slab.h>
@@ -185,7 +186,7 @@ static struct notifier_block kvm_clock_notifier = {
185int kvm_arch_hardware_setup(void) 186int kvm_arch_hardware_setup(void)
186{ 187{
187 gmap_notifier.notifier_call = kvm_gmap_notifier; 188 gmap_notifier.notifier_call = kvm_gmap_notifier;
188 gmap_register_ipte_notifier(&gmap_notifier); 189 gmap_register_pte_notifier(&gmap_notifier);
189 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 190 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
190 &kvm_clock_notifier); 191 &kvm_clock_notifier);
191 return 0; 192 return 0;
@@ -193,7 +194,7 @@ int kvm_arch_hardware_setup(void)
193 194
194void kvm_arch_hardware_unsetup(void) 195void kvm_arch_hardware_unsetup(void)
195{ 196{
196 gmap_unregister_ipte_notifier(&gmap_notifier); 197 gmap_unregister_pte_notifier(&gmap_notifier);
197 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 198 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
198 &kvm_clock_notifier); 199 &kvm_clock_notifier);
199} 200}
@@ -2272,16 +2273,16 @@ retry:
2272 return 0; 2273 return 0;
2273 /* 2274 /*
2274 * We use MMU_RELOAD just to re-arm the ipte notifier for the 2275 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2275 * guest prefix page. gmap_ipte_notify will wait on the ptl lock. 2276 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2276 * This ensures that the ipte instruction for this request has 2277 * This ensures that the ipte instruction for this request has
2277 * already finished. We might race against a second unmapper that 2278 * already finished. We might race against a second unmapper that
2278 * wants to set the blocking bit. Lets just retry the request loop. 2279 * wants to set the blocking bit. Lets just retry the request loop.
2279 */ 2280 */
2280 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 2281 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2281 int rc; 2282 int rc;
2282 rc = gmap_ipte_notify(vcpu->arch.gmap, 2283 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2283 kvm_s390_get_prefix(vcpu), 2284 kvm_s390_get_prefix(vcpu),
2284 PAGE_SIZE * 2); 2285 PAGE_SIZE * 2, PROT_WRITE);
2285 if (rc) 2286 if (rc)
2286 return rc; 2287 return rc;
2287 goto retry; 2288 goto retry;
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 8b56423a8297..480c076afceb 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -553,29 +553,29 @@ static LIST_HEAD(gmap_notifier_list);
553static DEFINE_SPINLOCK(gmap_notifier_lock); 553static DEFINE_SPINLOCK(gmap_notifier_lock);
554 554
555/** 555/**
556 * gmap_register_ipte_notifier - register a pte invalidation callback 556 * gmap_register_pte_notifier - register a pte invalidation callback
557 * @nb: pointer to the gmap notifier block 557 * @nb: pointer to the gmap notifier block
558 */ 558 */
559void gmap_register_ipte_notifier(struct gmap_notifier *nb) 559void gmap_register_pte_notifier(struct gmap_notifier *nb)
560{ 560{
561 spin_lock(&gmap_notifier_lock); 561 spin_lock(&gmap_notifier_lock);
562 list_add_rcu(&nb->list, &gmap_notifier_list); 562 list_add_rcu(&nb->list, &gmap_notifier_list);
563 spin_unlock(&gmap_notifier_lock); 563 spin_unlock(&gmap_notifier_lock);
564} 564}
565EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); 565EXPORT_SYMBOL_GPL(gmap_register_pte_notifier);
566 566
567/** 567/**
568 * gmap_unregister_ipte_notifier - remove a pte invalidation callback 568 * gmap_unregister_pte_notifier - remove a pte invalidation callback
569 * @nb: pointer to the gmap notifier block 569 * @nb: pointer to the gmap notifier block
570 */ 570 */
571void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) 571void gmap_unregister_pte_notifier(struct gmap_notifier *nb)
572{ 572{
573 spin_lock(&gmap_notifier_lock); 573 spin_lock(&gmap_notifier_lock);
574 list_del_rcu(&nb->list); 574 list_del_rcu(&nb->list);
575 spin_unlock(&gmap_notifier_lock); 575 spin_unlock(&gmap_notifier_lock);
576 synchronize_rcu(); 576 synchronize_rcu();
577} 577}
578EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); 578EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier);
579 579
580/** 580/**
581 * gmap_call_notifier - call all registered invalidation callbacks 581 * gmap_call_notifier - call all registered invalidation callbacks
@@ -593,62 +593,150 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start,
593} 593}
594 594
595/** 595/**
596 * gmap_ipte_notify - mark a range of ptes for invalidation notification 596 * gmap_table_walk - walk the gmap page tables
597 * @gmap: pointer to guest mapping meta data structure
598 * @gaddr: virtual address in the guest address space
599 *
600 * Returns a table pointer for the given guest address.
601 */
602static inline unsigned long *gmap_table_walk(struct gmap *gmap,
603 unsigned long gaddr)
604{
605 unsigned long *table;
606
607 table = gmap->table;
608 switch (gmap->asce & _ASCE_TYPE_MASK) {
609 case _ASCE_TYPE_REGION1:
610 table += (gaddr >> 53) & 0x7ff;
611 if (*table & _REGION_ENTRY_INVALID)
612 return NULL;
613 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
614 /* Fallthrough */
615 case _ASCE_TYPE_REGION2:
616 table += (gaddr >> 42) & 0x7ff;
617 if (*table & _REGION_ENTRY_INVALID)
618 return NULL;
619 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
620 /* Fallthrough */
621 case _ASCE_TYPE_REGION3:
622 table += (gaddr >> 31) & 0x7ff;
623 if (*table & _REGION_ENTRY_INVALID)
624 return NULL;
625 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
626 /* Fallthrough */
627 case _ASCE_TYPE_SEGMENT:
628 table += (gaddr >> 20) & 0x7ff;
629 }
630 return table;
631}
632
633/**
634 * gmap_pte_op_walk - walk the gmap page table, get the page table lock
635 * and return the pte pointer
636 * @gmap: pointer to guest mapping meta data structure
637 * @gaddr: virtual address in the guest address space
638 * @ptl: pointer to the spinlock pointer
639 *
640 * Returns a pointer to the locked pte for a guest address, or NULL
641 */
642static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr,
643 spinlock_t **ptl)
644{
645 unsigned long *table;
646
647 /* Walk the gmap page table, lock and get pte pointer */
648 table = gmap_table_walk(gmap, gaddr);
649 if (!table || *table & _SEGMENT_ENTRY_INVALID)
650 return NULL;
651 return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl);
652}
653
654/**
655 * gmap_pte_op_fixup - force a page in and connect the gmap page table
656 * @gmap: pointer to guest mapping meta data structure
657 * @gaddr: virtual address in the guest address space
658 * @vmaddr: address in the host process address space
659 *
660 * Returns 0 if the caller can retry __gmap_translate (might fail again),
661 * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing
662 * up or connecting the gmap page table.
663 */
664static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
665 unsigned long vmaddr)
666{
667 struct mm_struct *mm = gmap->mm;
668 bool unlocked = false;
669
670 if (fixup_user_fault(current, mm, vmaddr, FAULT_FLAG_WRITE, &unlocked))
671 return -EFAULT;
672 if (unlocked)
673 /* lost mmap_sem, caller has to retry __gmap_translate */
674 return 0;
675 /* Connect the page tables */
676 return __gmap_link(gmap, gaddr, vmaddr);
677}
678
679/**
680 * gmap_pte_op_end - release the page table lock
681 * @ptl: pointer to the spinlock pointer
682 */
683static void gmap_pte_op_end(spinlock_t *ptl)
684{
685 spin_unlock(ptl);
686}
687
688/**
689 * gmap_mprotect_notify - change access rights for a range of ptes and
690 * call the notifier if any pte changes again
597 * @gmap: pointer to guest mapping meta data structure 691 * @gmap: pointer to guest mapping meta data structure
598 * @gaddr: virtual address in the guest address space 692 * @gaddr: virtual address in the guest address space
599 * @len: size of area 693 * @len: size of area
694 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
600 * 695 *
601 * Returns 0 if for each page in the given range a gmap mapping exists and 696 * Returns 0 if for each page in the given range a gmap mapping exists,
602 * the invalidation notification could be set. If the gmap mapping is missing 697 * the new access rights could be set and the notifier could be armed.
603 * for one or more pages -EFAULT is returned. If no memory could be allocated 698 * If the gmap mapping is missing for one or more pages -EFAULT is
604 * -ENOMEM is returned. This function establishes missing page table entries. 699 * returned. If no memory could be allocated -ENOMEM is returned.
700 * This function establishes missing page table entries.
605 */ 701 */
606int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) 702int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
703 unsigned long len, int prot)
607{ 704{
608 unsigned long addr; 705 unsigned long vmaddr;
609 spinlock_t *ptl; 706 spinlock_t *ptl;
610 pte_t *ptep; 707 pte_t *ptep;
611 bool unlocked;
612 int rc = 0; 708 int rc = 0;
613 709
614 if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) 710 if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
615 return -EINVAL; 711 return -EINVAL;
712 if (!MACHINE_HAS_ESOP && prot == PROT_READ)
713 return -EINVAL;
616 down_read(&gmap->mm->mmap_sem); 714 down_read(&gmap->mm->mmap_sem);
617 while (len) { 715 while (len) {
618 unlocked = false; 716 rc = -EAGAIN;
619 /* Convert gmap address and connect the page tables */ 717 ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
620 addr = __gmap_translate(gmap, gaddr); 718 if (ptep) {
621 if (IS_ERR_VALUE(addr)) { 719 rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot);
622 rc = addr; 720 gmap_pte_op_end(ptl);
623 break;
624 }
625 /* Get the page mapped */
626 if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE,
627 &unlocked)) {
628 rc = -EFAULT;
629 break;
630 } 721 }
631 /* While trying to map mmap_sem got unlocked. Let us retry */ 722 if (rc) {
632 if (unlocked) 723 vmaddr = __gmap_translate(gmap, gaddr);
724 if (IS_ERR_VALUE(vmaddr)) {
725 rc = vmaddr;
726 break;
727 }
728 rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr);
729 if (rc)
730 break;
633 continue; 731 continue;
634 rc = __gmap_link(gmap, gaddr, addr);
635 if (rc)
636 break;
637 /* Walk the process page table, lock and get pte pointer */
638 ptep = get_locked_pte(gmap->mm, addr, &ptl);
639 VM_BUG_ON(!ptep);
640 /* Set notification bit in the pgste of the pte */
641 if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
642 ptep_set_notify(gmap->mm, addr, ptep);
643 gaddr += PAGE_SIZE;
644 len -= PAGE_SIZE;
645 } 732 }
646 pte_unmap_unlock(ptep, ptl); 733 gaddr += PAGE_SIZE;
734 len -= PAGE_SIZE;
647 } 735 }
648 up_read(&gmap->mm->mmap_sem); 736 up_read(&gmap->mm->mmap_sem);
649 return rc; 737 return rc;
650} 738}
651EXPORT_SYMBOL_GPL(gmap_ipte_notify); 739EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
652 740
653/** 741/**
654 * ptep_notify - call all invalidation callbacks for a specific pte. 742 * ptep_notify - call all invalidation callbacks for a specific pte.
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index fa286d0c0f2d..ab65fb11e058 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -179,9 +179,9 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
179 return pgste; 179 return pgste;
180} 180}
181 181
182static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, 182static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
183 unsigned long addr, 183 unsigned long addr,
184 pte_t *ptep, pgste_t pgste) 184 pte_t *ptep, pgste_t pgste)
185{ 185{
186#ifdef CONFIG_PGSTE 186#ifdef CONFIG_PGSTE
187 if (pgste_val(pgste) & PGSTE_IN_BIT) { 187 if (pgste_val(pgste) & PGSTE_IN_BIT) {
@@ -199,7 +199,7 @@ static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
199 199
200 if (mm_has_pgste(mm)) { 200 if (mm_has_pgste(mm)) {
201 pgste = pgste_get_lock(ptep); 201 pgste = pgste_get_lock(ptep);
202 pgste = pgste_ipte_notify(mm, addr, ptep, pgste); 202 pgste = pgste_pte_notify(mm, addr, ptep, pgste);
203 } 203 }
204 return pgste; 204 return pgste;
205} 205}
@@ -414,6 +414,50 @@ void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
414 pgste_set_unlock(ptep, pgste); 414 pgste_set_unlock(ptep, pgste);
415} 415}
416 416
417/**
418 * ptep_force_prot - change access rights of a locked pte
419 * @mm: pointer to the process mm_struct
420 * @addr: virtual address in the guest address space
421 * @ptep: pointer to the page table entry
422 * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE
423 *
424 * Returns 0 if the access rights were changed and -EAGAIN if the current
425 * and requested access rights are incompatible.
426 */
427int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
428 pte_t *ptep, int prot)
429{
430 pte_t entry;
431 pgste_t pgste;
432 int pte_i, pte_p;
433
434 pgste = pgste_get_lock(ptep);
435 entry = *ptep;
436 /* Check pte entry after all locks have been acquired */
437 pte_i = pte_val(entry) & _PAGE_INVALID;
438 pte_p = pte_val(entry) & _PAGE_PROTECT;
439 if ((pte_i && (prot != PROT_NONE)) ||
440 (pte_p && (prot & PROT_WRITE))) {
441 pgste_set_unlock(ptep, pgste);
442 return -EAGAIN;
443 }
444 /* Change access rights and set the pgste notification bit */
445 if (prot == PROT_NONE && !pte_i) {
446 ptep_flush_direct(mm, addr, ptep);
447 pgste = pgste_update_all(entry, pgste, mm);
448 pte_val(entry) |= _PAGE_INVALID;
449 }
450 if (prot == PROT_READ && !pte_p) {
451 ptep_flush_direct(mm, addr, ptep);
452 pte_val(entry) &= ~_PAGE_INVALID;
453 pte_val(entry) |= _PAGE_PROTECT;
454 }
455 pgste_val(pgste) |= PGSTE_IN_BIT;
456 pgste = pgste_set_pte(ptep, pgste, entry);
457 pgste_set_unlock(ptep, pgste);
458 return 0;
459}
460
417static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) 461static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
418{ 462{
419 if (!non_swap_entry(entry)) 463 if (!non_swap_entry(entry))
@@ -483,7 +527,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
483 pgste_val(pgste) &= ~PGSTE_UC_BIT; 527 pgste_val(pgste) &= ~PGSTE_UC_BIT;
484 pte = *ptep; 528 pte = *ptep;
485 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 529 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
486 pgste = pgste_ipte_notify(mm, addr, ptep, pgste); 530 pgste = pgste_pte_notify(mm, addr, ptep, pgste);
487 __ptep_ipte(addr, ptep); 531 __ptep_ipte(addr, ptep);
488 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 532 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
489 pte_val(pte) |= _PAGE_PROTECT; 533 pte_val(pte) |= _PAGE_PROTECT;