aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2013-04-17 04:53:39 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2013-05-03 08:21:12 -0400
commitd3383632d4e8e9ae747f582eaee8c2e79f828ae6 (patch)
tree9583ce35ae19ba85cc34ffe2f5b64e7d99045cbe /arch/s390
parent56bbe686693df7edcca18d1808edd80609e63c31 (diff)
s390/mm: add pte invalidation notifier for kvm
Add a notifier for kvm to get control before a page table entry is invalidated. The notifier is only called for ptes of an address space with pgstes that have been explicitly marked to require notification. Kvm will use this to get control before prefix pages of virtual CPU are unmapped. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/include/asm/pgtable.h66
-rw-r--r--arch/s390/mm/pgtable.c121
2 files changed, 173 insertions, 14 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index b4622915bd15..4105b8221fdd 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -306,6 +306,7 @@ extern unsigned long MODULES_END;
306#define RCP_HC_BIT 0x00200000UL 306#define RCP_HC_BIT 0x00200000UL
307#define RCP_GR_BIT 0x00040000UL 307#define RCP_GR_BIT 0x00040000UL
308#define RCP_GC_BIT 0x00020000UL 308#define RCP_GC_BIT 0x00020000UL
309#define RCP_IN_BIT 0x00008000UL /* IPTE notify bit */
309 310
310/* User dirty / referenced bit for KVM's migration feature */ 311/* User dirty / referenced bit for KVM's migration feature */
311#define KVM_UR_BIT 0x00008000UL 312#define KVM_UR_BIT 0x00008000UL
@@ -373,6 +374,7 @@ extern unsigned long MODULES_END;
373#define RCP_HC_BIT 0x0020000000000000UL 374#define RCP_HC_BIT 0x0020000000000000UL
374#define RCP_GR_BIT 0x0004000000000000UL 375#define RCP_GR_BIT 0x0004000000000000UL
375#define RCP_GC_BIT 0x0002000000000000UL 376#define RCP_GC_BIT 0x0002000000000000UL
377#define RCP_IN_BIT 0x0000800000000000UL /* IPTE notify bit */
376 378
377/* User dirty / referenced bit for KVM's migration feature */ 379/* User dirty / referenced bit for KVM's migration feature */
378#define KVM_UR_BIT 0x0000800000000000UL 380#define KVM_UR_BIT 0x0000800000000000UL
@@ -746,30 +748,42 @@ struct gmap {
746 748
747/** 749/**
748 * struct gmap_rmap - reverse mapping for segment table entries 750 * struct gmap_rmap - reverse mapping for segment table entries
749 * @next: pointer to the next gmap_rmap structure in the list 751 * @gmap: pointer to the gmap_struct
750 * @entry: pointer to a segment table entry 752 * @entry: pointer to a segment table entry
753 * @vmaddr: virtual address in the guest address space
751 */ 754 */
752struct gmap_rmap { 755struct gmap_rmap {
753 struct list_head list; 756 struct list_head list;
757 struct gmap *gmap;
754 unsigned long *entry; 758 unsigned long *entry;
759 unsigned long vmaddr;
755}; 760};
756 761
757/** 762/**
758 * struct gmap_pgtable - gmap information attached to a page table 763 * struct gmap_pgtable - gmap information attached to a page table
759 * @vmaddr: address of the 1MB segment in the process virtual memory 764 * @vmaddr: address of the 1MB segment in the process virtual memory
760 * @mapper: list of segment table entries maping a page table 765 * @mapper: list of segment table entries mapping a page table
761 */ 766 */
762struct gmap_pgtable { 767struct gmap_pgtable {
763 unsigned long vmaddr; 768 unsigned long vmaddr;
764 struct list_head mapper; 769 struct list_head mapper;
765}; 770};
766 771
772/**
773 * struct gmap_notifier - notify function block for page invalidation
774 * @notifier_call: address of callback function
775 */
776struct gmap_notifier {
777 struct list_head list;
778 void (*notifier_call)(struct gmap *gmap, unsigned long address);
779};
780
767struct gmap *gmap_alloc(struct mm_struct *mm); 781struct gmap *gmap_alloc(struct mm_struct *mm);
768void gmap_free(struct gmap *gmap); 782void gmap_free(struct gmap *gmap);
769void gmap_enable(struct gmap *gmap); 783void gmap_enable(struct gmap *gmap);
770void gmap_disable(struct gmap *gmap); 784void gmap_disable(struct gmap *gmap);
771int gmap_map_segment(struct gmap *gmap, unsigned long from, 785int gmap_map_segment(struct gmap *gmap, unsigned long from,
772 unsigned long to, unsigned long length); 786 unsigned long to, unsigned long len);
773int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); 787int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
774unsigned long __gmap_translate(unsigned long address, struct gmap *); 788unsigned long __gmap_translate(unsigned long address, struct gmap *);
775unsigned long gmap_translate(unsigned long address, struct gmap *); 789unsigned long gmap_translate(unsigned long address, struct gmap *);
@@ -777,6 +791,24 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *);
777unsigned long gmap_fault(unsigned long address, struct gmap *); 791unsigned long gmap_fault(unsigned long address, struct gmap *);
778void gmap_discard(unsigned long from, unsigned long to, struct gmap *); 792void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
779 793
794void gmap_register_ipte_notifier(struct gmap_notifier *);
795void gmap_unregister_ipte_notifier(struct gmap_notifier *);
796int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
797void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
798
799static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
800 unsigned long addr,
801 pte_t *ptep, pgste_t pgste)
802{
803#ifdef CONFIG_PGSTE
804 if (pgste_val(pgste) & RCP_IN_BIT) {
805 pgste_val(pgste) &= ~RCP_IN_BIT;
806 gmap_do_ipte_notify(mm, addr, ptep);
807 }
808#endif
809 return pgste;
810}
811
780/* 812/*
781 * Certain architectures need to do special things when PTEs 813 * Certain architectures need to do special things when PTEs
782 * within a page table are directly modified. Thus, the following 814 * within a page table are directly modified. Thus, the following
@@ -1032,8 +1064,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
1032 pte_t pte; 1064 pte_t pte;
1033 1065
1034 mm->context.flush_mm = 1; 1066 mm->context.flush_mm = 1;
1035 if (mm_has_pgste(mm)) 1067 if (mm_has_pgste(mm)) {
1036 pgste = pgste_get_lock(ptep); 1068 pgste = pgste_get_lock(ptep);
1069 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1070 }
1037 1071
1038 pte = *ptep; 1072 pte = *ptep;
1039 if (!mm_exclusive(mm)) 1073 if (!mm_exclusive(mm))
@@ -1052,11 +1086,14 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
1052 unsigned long address, 1086 unsigned long address,
1053 pte_t *ptep) 1087 pte_t *ptep)
1054{ 1088{
1089 pgste_t pgste;
1055 pte_t pte; 1090 pte_t pte;
1056 1091
1057 mm->context.flush_mm = 1; 1092 mm->context.flush_mm = 1;
1058 if (mm_has_pgste(mm)) 1093 if (mm_has_pgste(mm)) {
1059 pgste_get_lock(ptep); 1094 pgste = pgste_get_lock(ptep);
1095 pgste_ipte_notify(mm, address, ptep, pgste);
1096 }
1060 1097
1061 pte = *ptep; 1098 pte = *ptep;
1062 if (!mm_exclusive(mm)) 1099 if (!mm_exclusive(mm))
@@ -1082,8 +1119,10 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
1082 pgste_t pgste; 1119 pgste_t pgste;
1083 pte_t pte; 1120 pte_t pte;
1084 1121
1085 if (mm_has_pgste(vma->vm_mm)) 1122 if (mm_has_pgste(vma->vm_mm)) {
1086 pgste = pgste_get_lock(ptep); 1123 pgste = pgste_get_lock(ptep);
1124 pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
1125 }
1087 1126
1088 pte = *ptep; 1127 pte = *ptep;
1089 __ptep_ipte(address, ptep); 1128 __ptep_ipte(address, ptep);
@@ -1111,8 +1150,11 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
1111 pgste_t pgste; 1150 pgste_t pgste;
1112 pte_t pte; 1151 pte_t pte;
1113 1152
1114 if (mm_has_pgste(mm)) 1153 if (mm_has_pgste(mm)) {
1115 pgste = pgste_get_lock(ptep); 1154 pgste = pgste_get_lock(ptep);
1155 if (!full)
1156 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1157 }
1116 1158
1117 pte = *ptep; 1159 pte = *ptep;
1118 if (!full) 1160 if (!full)
@@ -1135,8 +1177,10 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
1135 1177
1136 if (pte_write(pte)) { 1178 if (pte_write(pte)) {
1137 mm->context.flush_mm = 1; 1179 mm->context.flush_mm = 1;
1138 if (mm_has_pgste(mm)) 1180 if (mm_has_pgste(mm)) {
1139 pgste = pgste_get_lock(ptep); 1181 pgste = pgste_get_lock(ptep);
1182 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1183 }
1140 1184
1141 if (!mm_exclusive(mm)) 1185 if (!mm_exclusive(mm))
1142 __ptep_ipte(address, ptep); 1186 __ptep_ipte(address, ptep);
@@ -1160,8 +1204,10 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
1160 1204
1161 if (pte_same(*ptep, entry)) 1205 if (pte_same(*ptep, entry))
1162 return 0; 1206 return 0;
1163 if (mm_has_pgste(vma->vm_mm)) 1207 if (mm_has_pgste(vma->vm_mm)) {
1164 pgste = pgste_get_lock(ptep); 1208 pgste = pgste_get_lock(ptep);
1209 pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
1210 }
1165 1211
1166 __ptep_ipte(address, ptep); 1212 __ptep_ipte(address, ptep);
1167 1213
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index bd954e96f51c..7805ddca833d 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -454,9 +454,8 @@ unsigned long gmap_translate(unsigned long address, struct gmap *gmap)
454} 454}
455EXPORT_SYMBOL_GPL(gmap_translate); 455EXPORT_SYMBOL_GPL(gmap_translate);
456 456
457static int gmap_connect_pgtable(unsigned long segment, 457static int gmap_connect_pgtable(unsigned long address, unsigned long segment,
458 unsigned long *segment_ptr, 458 unsigned long *segment_ptr, struct gmap *gmap)
459 struct gmap *gmap)
460{ 459{
461 unsigned long vmaddr; 460 unsigned long vmaddr;
462 struct vm_area_struct *vma; 461 struct vm_area_struct *vma;
@@ -491,7 +490,9 @@ static int gmap_connect_pgtable(unsigned long segment,
491 /* Link gmap segment table entry location to page table. */ 490 /* Link gmap segment table entry location to page table. */
492 page = pmd_page(*pmd); 491 page = pmd_page(*pmd);
493 mp = (struct gmap_pgtable *) page->index; 492 mp = (struct gmap_pgtable *) page->index;
493 rmap->gmap = gmap;
494 rmap->entry = segment_ptr; 494 rmap->entry = segment_ptr;
495 rmap->vmaddr = address;
495 spin_lock(&mm->page_table_lock); 496 spin_lock(&mm->page_table_lock);
496 if (*segment_ptr == segment) { 497 if (*segment_ptr == segment) {
497 list_add(&rmap->list, &mp->mapper); 498 list_add(&rmap->list, &mp->mapper);
@@ -553,7 +554,7 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
553 if (!(segment & _SEGMENT_ENTRY_RO)) 554 if (!(segment & _SEGMENT_ENTRY_RO))
554 /* Nothing mapped in the gmap address space. */ 555 /* Nothing mapped in the gmap address space. */
555 break; 556 break;
556 rc = gmap_connect_pgtable(segment, segment_ptr, gmap); 557 rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
557 if (rc) 558 if (rc)
558 return rc; 559 return rc;
559 } 560 }
@@ -619,6 +620,118 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
619} 620}
620EXPORT_SYMBOL_GPL(gmap_discard); 621EXPORT_SYMBOL_GPL(gmap_discard);
621 622
623static LIST_HEAD(gmap_notifier_list);
624static DEFINE_SPINLOCK(gmap_notifier_lock);
625
626/**
627 * gmap_register_ipte_notifier - register a pte invalidation callback
628 * @nb: pointer to the gmap notifier block
629 */
630void gmap_register_ipte_notifier(struct gmap_notifier *nb)
631{
632 spin_lock(&gmap_notifier_lock);
633 list_add(&nb->list, &gmap_notifier_list);
634 spin_unlock(&gmap_notifier_lock);
635}
636EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
637
638/**
639 * gmap_unregister_ipte_notifier - remove a pte invalidation callback
640 * @nb: pointer to the gmap notifier block
641 */
642void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
643{
644 spin_lock(&gmap_notifier_lock);
645 list_del_init(&nb->list);
646 spin_unlock(&gmap_notifier_lock);
647}
648EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
649
650/**
651 * gmap_ipte_notify - mark a range of ptes for invalidation notification
652 * @gmap: pointer to guest mapping meta data structure
653 * @address: virtual address in the guest address space
654 * @len: size of area
655 *
656 * Returns 0 if for each page in the given range a gmap mapping exists and
657 * the invalidation notification could be set. If the gmap mapping is missing
658 * for one or more pages -EFAULT is returned. If no memory could be allocated
659 * -ENOMEM is returned. This function establishes missing page table entries.
660 */
661int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
662{
663 unsigned long addr;
664 spinlock_t *ptl;
665 pte_t *ptep, entry;
666 pgste_t pgste;
667 int rc = 0;
668
669 if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK))
670 return -EINVAL;
671 down_read(&gmap->mm->mmap_sem);
672 while (len) {
673 /* Convert gmap address and connect the page tables */
674 addr = __gmap_fault(start, gmap);
675 if (IS_ERR_VALUE(addr)) {
676 rc = addr;
677 break;
678 }
679 /* Get the page mapped */
680 if (get_user_pages(current, gmap->mm, addr, 1, 1, 0,
681 NULL, NULL) != 1) {
682 rc = -EFAULT;
683 break;
684 }
685 /* Walk the process page table, lock and get pte pointer */
686 ptep = get_locked_pte(gmap->mm, addr, &ptl);
687 if (unlikely(!ptep))
688 continue;
689 /* Set notification bit in the pgste of the pte */
690 entry = *ptep;
691 if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) {
692 pgste = pgste_get_lock(ptep);
693 pgste_val(pgste) |= RCP_IN_BIT;
694 pgste_set_unlock(ptep, pgste);
695 start += PAGE_SIZE;
696 len -= PAGE_SIZE;
697 }
698 spin_unlock(ptl);
699 }
700 up_read(&gmap->mm->mmap_sem);
701 return rc;
702}
703EXPORT_SYMBOL_GPL(gmap_ipte_notify);
704
705/**
706 * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
707 * @mm: pointer to the process mm_struct
708 * @addr: virtual address in the process address space
709 * @pte: pointer to the page table entry
710 *
711 * This function is assumed to be called with the page table lock held
712 * for the pte to notify.
713 */
714void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte)
715{
716 unsigned long segment_offset;
717 struct gmap_notifier *nb;
718 struct gmap_pgtable *mp;
719 struct gmap_rmap *rmap;
720 struct page *page;
721
722 segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
723 segment_offset = segment_offset * (4096 / sizeof(pte_t));
724 page = pfn_to_page(__pa(pte) >> PAGE_SHIFT);
725 mp = (struct gmap_pgtable *) page->index;
726 spin_lock(&gmap_notifier_lock);
727 list_for_each_entry(rmap, &mp->mapper, list) {
728 list_for_each_entry(nb, &gmap_notifier_list, list)
729 nb->notifier_call(rmap->gmap,
730 rmap->vmaddr + segment_offset);
731 }
732 spin_unlock(&gmap_notifier_lock);
733}
734
622static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 735static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
623 unsigned long vmaddr) 736 unsigned long vmaddr)
624{ 737{