diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2016-03-08 05:54:42 -0500 |
---|---|---|
committer | Christian Borntraeger <borntraeger@de.ibm.com> | 2016-06-20 03:46:49 -0400 |
commit | b2d73b2a0ad1c758cb0c1acb01a911744b845942 (patch) | |
tree | 1b88c5ba8120fc53fc4c9ecb7993e73a7e68c664 /arch/s390/mm | |
parent | 8ecb1a59d6c6674bc98e4eee0c2482490748e21a (diff) |
s390/mm: extended gmap pte notifier
The current gmap pte notifier forces a pte into to a read-write state.
If the pte is invalidated the gmap notifier is called to inform KVM
that the mapping will go away.
Extend this approach to allow read-write, read-only and no-access
as possible target states and call the pte notifier for any change
to the pte.
This mechanism is used to temporarily set specific access rights for
a pte without doing the heavy work of a true mprotect call.
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/gmap.c | 170 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 54 |
2 files changed, 178 insertions, 46 deletions
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 8b56423a8297..480c076afceb 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c | |||
@@ -553,29 +553,29 @@ static LIST_HEAD(gmap_notifier_list); | |||
553 | static DEFINE_SPINLOCK(gmap_notifier_lock); | 553 | static DEFINE_SPINLOCK(gmap_notifier_lock); |
554 | 554 | ||
555 | /** | 555 | /** |
556 | * gmap_register_ipte_notifier - register a pte invalidation callback | 556 | * gmap_register_pte_notifier - register a pte invalidation callback |
557 | * @nb: pointer to the gmap notifier block | 557 | * @nb: pointer to the gmap notifier block |
558 | */ | 558 | */ |
559 | void gmap_register_ipte_notifier(struct gmap_notifier *nb) | 559 | void gmap_register_pte_notifier(struct gmap_notifier *nb) |
560 | { | 560 | { |
561 | spin_lock(&gmap_notifier_lock); | 561 | spin_lock(&gmap_notifier_lock); |
562 | list_add_rcu(&nb->list, &gmap_notifier_list); | 562 | list_add_rcu(&nb->list, &gmap_notifier_list); |
563 | spin_unlock(&gmap_notifier_lock); | 563 | spin_unlock(&gmap_notifier_lock); |
564 | } | 564 | } |
565 | EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); | 565 | EXPORT_SYMBOL_GPL(gmap_register_pte_notifier); |
566 | 566 | ||
567 | /** | 567 | /** |
568 | * gmap_unregister_ipte_notifier - remove a pte invalidation callback | 568 | * gmap_unregister_pte_notifier - remove a pte invalidation callback |
569 | * @nb: pointer to the gmap notifier block | 569 | * @nb: pointer to the gmap notifier block |
570 | */ | 570 | */ |
571 | void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) | 571 | void gmap_unregister_pte_notifier(struct gmap_notifier *nb) |
572 | { | 572 | { |
573 | spin_lock(&gmap_notifier_lock); | 573 | spin_lock(&gmap_notifier_lock); |
574 | list_del_rcu(&nb->list); | 574 | list_del_rcu(&nb->list); |
575 | spin_unlock(&gmap_notifier_lock); | 575 | spin_unlock(&gmap_notifier_lock); |
576 | synchronize_rcu(); | 576 | synchronize_rcu(); |
577 | } | 577 | } |
578 | EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); | 578 | EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier); |
579 | 579 | ||
580 | /** | 580 | /** |
581 | * gmap_call_notifier - call all registered invalidation callbacks | 581 | * gmap_call_notifier - call all registered invalidation callbacks |
@@ -593,62 +593,150 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start, | |||
593 | } | 593 | } |
594 | 594 | ||
595 | /** | 595 | /** |
596 | * gmap_ipte_notify - mark a range of ptes for invalidation notification | 596 | * gmap_table_walk - walk the gmap page tables |
597 | * @gmap: pointer to guest mapping meta data structure | ||
598 | * @gaddr: virtual address in the guest address space | ||
599 | * | ||
600 | * Returns a table pointer for the given guest address. | ||
601 | */ | ||
602 | static inline unsigned long *gmap_table_walk(struct gmap *gmap, | ||
603 | unsigned long gaddr) | ||
604 | { | ||
605 | unsigned long *table; | ||
606 | |||
607 | table = gmap->table; | ||
608 | switch (gmap->asce & _ASCE_TYPE_MASK) { | ||
609 | case _ASCE_TYPE_REGION1: | ||
610 | table += (gaddr >> 53) & 0x7ff; | ||
611 | if (*table & _REGION_ENTRY_INVALID) | ||
612 | return NULL; | ||
613 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
614 | /* Fallthrough */ | ||
615 | case _ASCE_TYPE_REGION2: | ||
616 | table += (gaddr >> 42) & 0x7ff; | ||
617 | if (*table & _REGION_ENTRY_INVALID) | ||
618 | return NULL; | ||
619 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
620 | /* Fallthrough */ | ||
621 | case _ASCE_TYPE_REGION3: | ||
622 | table += (gaddr >> 31) & 0x7ff; | ||
623 | if (*table & _REGION_ENTRY_INVALID) | ||
624 | return NULL; | ||
625 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
626 | /* Fallthrough */ | ||
627 | case _ASCE_TYPE_SEGMENT: | ||
628 | table += (gaddr >> 20) & 0x7ff; | ||
629 | } | ||
630 | return table; | ||
631 | } | ||
632 | |||
633 | /** | ||
634 | * gmap_pte_op_walk - walk the gmap page table, get the page table lock | ||
635 | * and return the pte pointer | ||
636 | * @gmap: pointer to guest mapping meta data structure | ||
637 | * @gaddr: virtual address in the guest address space | ||
638 | * @ptl: pointer to the spinlock pointer | ||
639 | * | ||
640 | * Returns a pointer to the locked pte for a guest address, or NULL | ||
641 | */ | ||
642 | static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr, | ||
643 | spinlock_t **ptl) | ||
644 | { | ||
645 | unsigned long *table; | ||
646 | |||
647 | /* Walk the gmap page table, lock and get pte pointer */ | ||
648 | table = gmap_table_walk(gmap, gaddr); | ||
649 | if (!table || *table & _SEGMENT_ENTRY_INVALID) | ||
650 | return NULL; | ||
651 | return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl); | ||
652 | } | ||
653 | |||
654 | /** | ||
655 | * gmap_pte_op_fixup - force a page in and connect the gmap page table | ||
656 | * @gmap: pointer to guest mapping meta data structure | ||
657 | * @gaddr: virtual address in the guest address space | ||
658 | * @vmaddr: address in the host process address space | ||
659 | * | ||
660 | * Returns 0 if the caller can retry __gmap_translate (might fail again), | ||
661 | * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing | ||
662 | * up or connecting the gmap page table. | ||
663 | */ | ||
664 | static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, | ||
665 | unsigned long vmaddr) | ||
666 | { | ||
667 | struct mm_struct *mm = gmap->mm; | ||
668 | bool unlocked = false; | ||
669 | |||
670 | if (fixup_user_fault(current, mm, vmaddr, FAULT_FLAG_WRITE, &unlocked)) | ||
671 | return -EFAULT; | ||
672 | if (unlocked) | ||
673 | /* lost mmap_sem, caller has to retry __gmap_translate */ | ||
674 | return 0; | ||
675 | /* Connect the page tables */ | ||
676 | return __gmap_link(gmap, gaddr, vmaddr); | ||
677 | } | ||
678 | |||
679 | /** | ||
680 | * gmap_pte_op_end - release the page table lock | ||
681 | * @ptl: pointer to the spinlock pointer | ||
682 | */ | ||
683 | static void gmap_pte_op_end(spinlock_t *ptl) | ||
684 | { | ||
685 | spin_unlock(ptl); | ||
686 | } | ||
687 | |||
688 | /** | ||
689 | * gmap_mprotect_notify - change access rights for a range of ptes and | ||
690 | * call the notifier if any pte changes again | ||
597 | * @gmap: pointer to guest mapping meta data structure | 691 | * @gmap: pointer to guest mapping meta data structure |
598 | * @gaddr: virtual address in the guest address space | 692 | * @gaddr: virtual address in the guest address space |
599 | * @len: size of area | 693 | * @len: size of area |
694 | * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE | ||
600 | * | 695 | * |
601 | * Returns 0 if for each page in the given range a gmap mapping exists and | 696 | * Returns 0 if for each page in the given range a gmap mapping exists, |
602 | * the invalidation notification could be set. If the gmap mapping is missing | 697 | * the new access rights could be set and the notifier could be armed. |
603 | * for one or more pages -EFAULT is returned. If no memory could be allocated | 698 | * If the gmap mapping is missing for one or more pages -EFAULT is |
604 | * -ENOMEM is returned. This function establishes missing page table entries. | 699 | * returned. If no memory could be allocated -ENOMEM is returned. |
700 | * This function establishes missing page table entries. | ||
605 | */ | 701 | */ |
606 | int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) | 702 | int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr, |
703 | unsigned long len, int prot) | ||
607 | { | 704 | { |
608 | unsigned long addr; | 705 | unsigned long vmaddr; |
609 | spinlock_t *ptl; | 706 | spinlock_t *ptl; |
610 | pte_t *ptep; | 707 | pte_t *ptep; |
611 | bool unlocked; | ||
612 | int rc = 0; | 708 | int rc = 0; |
613 | 709 | ||
614 | if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) | 710 | if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) |
615 | return -EINVAL; | 711 | return -EINVAL; |
712 | if (!MACHINE_HAS_ESOP && prot == PROT_READ) | ||
713 | return -EINVAL; | ||
616 | down_read(&gmap->mm->mmap_sem); | 714 | down_read(&gmap->mm->mmap_sem); |
617 | while (len) { | 715 | while (len) { |
618 | unlocked = false; | 716 | rc = -EAGAIN; |
619 | /* Convert gmap address and connect the page tables */ | 717 | ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); |
620 | addr = __gmap_translate(gmap, gaddr); | 718 | if (ptep) { |
621 | if (IS_ERR_VALUE(addr)) { | 719 | rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot); |
622 | rc = addr; | 720 | gmap_pte_op_end(ptl); |
623 | break; | ||
624 | } | ||
625 | /* Get the page mapped */ | ||
626 | if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, | ||
627 | &unlocked)) { | ||
628 | rc = -EFAULT; | ||
629 | break; | ||
630 | } | 721 | } |
631 | /* While trying to map mmap_sem got unlocked. Let us retry */ | 722 | if (rc) { |
632 | if (unlocked) | 723 | vmaddr = __gmap_translate(gmap, gaddr); |
724 | if (IS_ERR_VALUE(vmaddr)) { | ||
725 | rc = vmaddr; | ||
726 | break; | ||
727 | } | ||
728 | rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr); | ||
729 | if (rc) | ||
730 | break; | ||
633 | continue; | 731 | continue; |
634 | rc = __gmap_link(gmap, gaddr, addr); | ||
635 | if (rc) | ||
636 | break; | ||
637 | /* Walk the process page table, lock and get pte pointer */ | ||
638 | ptep = get_locked_pte(gmap->mm, addr, &ptl); | ||
639 | VM_BUG_ON(!ptep); | ||
640 | /* Set notification bit in the pgste of the pte */ | ||
641 | if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { | ||
642 | ptep_set_notify(gmap->mm, addr, ptep); | ||
643 | gaddr += PAGE_SIZE; | ||
644 | len -= PAGE_SIZE; | ||
645 | } | 732 | } |
646 | pte_unmap_unlock(ptep, ptl); | 733 | gaddr += PAGE_SIZE; |
734 | len -= PAGE_SIZE; | ||
647 | } | 735 | } |
648 | up_read(&gmap->mm->mmap_sem); | 736 | up_read(&gmap->mm->mmap_sem); |
649 | return rc; | 737 | return rc; |
650 | } | 738 | } |
651 | EXPORT_SYMBOL_GPL(gmap_ipte_notify); | 739 | EXPORT_SYMBOL_GPL(gmap_mprotect_notify); |
652 | 740 | ||
653 | /** | 741 | /** |
654 | * ptep_notify - call all invalidation callbacks for a specific pte. | 742 | * ptep_notify - call all invalidation callbacks for a specific pte. |
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index fa286d0c0f2d..ab65fb11e058 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -179,9 +179,9 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) | |||
179 | return pgste; | 179 | return pgste; |
180 | } | 180 | } |
181 | 181 | ||
182 | static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, | 182 | static inline pgste_t pgste_pte_notify(struct mm_struct *mm, |
183 | unsigned long addr, | 183 | unsigned long addr, |
184 | pte_t *ptep, pgste_t pgste) | 184 | pte_t *ptep, pgste_t pgste) |
185 | { | 185 | { |
186 | #ifdef CONFIG_PGSTE | 186 | #ifdef CONFIG_PGSTE |
187 | if (pgste_val(pgste) & PGSTE_IN_BIT) { | 187 | if (pgste_val(pgste) & PGSTE_IN_BIT) { |
@@ -199,7 +199,7 @@ static inline pgste_t ptep_xchg_start(struct mm_struct *mm, | |||
199 | 199 | ||
200 | if (mm_has_pgste(mm)) { | 200 | if (mm_has_pgste(mm)) { |
201 | pgste = pgste_get_lock(ptep); | 201 | pgste = pgste_get_lock(ptep); |
202 | pgste = pgste_ipte_notify(mm, addr, ptep, pgste); | 202 | pgste = pgste_pte_notify(mm, addr, ptep, pgste); |
203 | } | 203 | } |
204 | return pgste; | 204 | return pgste; |
205 | } | 205 | } |
@@ -414,6 +414,50 @@ void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |||
414 | pgste_set_unlock(ptep, pgste); | 414 | pgste_set_unlock(ptep, pgste); |
415 | } | 415 | } |
416 | 416 | ||
417 | /** | ||
418 | * ptep_force_prot - change access rights of a locked pte | ||
419 | * @mm: pointer to the process mm_struct | ||
420 | * @addr: virtual address in the guest address space | ||
421 | * @ptep: pointer to the page table entry | ||
422 | * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE | ||
423 | * | ||
424 | * Returns 0 if the access rights were changed and -EAGAIN if the current | ||
425 | * and requested access rights are incompatible. | ||
426 | */ | ||
427 | int ptep_force_prot(struct mm_struct *mm, unsigned long addr, | ||
428 | pte_t *ptep, int prot) | ||
429 | { | ||
430 | pte_t entry; | ||
431 | pgste_t pgste; | ||
432 | int pte_i, pte_p; | ||
433 | |||
434 | pgste = pgste_get_lock(ptep); | ||
435 | entry = *ptep; | ||
436 | /* Check pte entry after all locks have been acquired */ | ||
437 | pte_i = pte_val(entry) & _PAGE_INVALID; | ||
438 | pte_p = pte_val(entry) & _PAGE_PROTECT; | ||
439 | if ((pte_i && (prot != PROT_NONE)) || | ||
440 | (pte_p && (prot & PROT_WRITE))) { | ||
441 | pgste_set_unlock(ptep, pgste); | ||
442 | return -EAGAIN; | ||
443 | } | ||
444 | /* Change access rights and set the pgste notification bit */ | ||
445 | if (prot == PROT_NONE && !pte_i) { | ||
446 | ptep_flush_direct(mm, addr, ptep); | ||
447 | pgste = pgste_update_all(entry, pgste, mm); | ||
448 | pte_val(entry) |= _PAGE_INVALID; | ||
449 | } | ||
450 | if (prot == PROT_READ && !pte_p) { | ||
451 | ptep_flush_direct(mm, addr, ptep); | ||
452 | pte_val(entry) &= ~_PAGE_INVALID; | ||
453 | pte_val(entry) |= _PAGE_PROTECT; | ||
454 | } | ||
455 | pgste_val(pgste) |= PGSTE_IN_BIT; | ||
456 | pgste = pgste_set_pte(ptep, pgste, entry); | ||
457 | pgste_set_unlock(ptep, pgste); | ||
458 | return 0; | ||
459 | } | ||
460 | |||
417 | static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) | 461 | static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) |
418 | { | 462 | { |
419 | if (!non_swap_entry(entry)) | 463 | if (!non_swap_entry(entry)) |
@@ -483,7 +527,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) | |||
483 | pgste_val(pgste) &= ~PGSTE_UC_BIT; | 527 | pgste_val(pgste) &= ~PGSTE_UC_BIT; |
484 | pte = *ptep; | 528 | pte = *ptep; |
485 | if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { | 529 | if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { |
486 | pgste = pgste_ipte_notify(mm, addr, ptep, pgste); | 530 | pgste = pgste_pte_notify(mm, addr, ptep, pgste); |
487 | __ptep_ipte(addr, ptep); | 531 | __ptep_ipte(addr, ptep); |
488 | if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) | 532 | if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) |
489 | pte_val(pte) |= _PAGE_PROTECT; | 533 | pte_val(pte) |= _PAGE_PROTECT; |