aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2013-10-18 06:03:41 -0400
committerChristian Borntraeger <borntraeger@de.ibm.com>2014-04-22 03:36:26 -0400
commit0a61b222df75a6a69dc34816f7db2f61fee8c935 (patch)
tree7fb9f148bc1b96bc02beed069d4490673d7798ab
parent693ffc0802db41911ada95a3e77546f0ed1e7d00 (diff)
KVM: s390/mm: use software dirty bit detection for user dirty tracking
Switch the user dirty bit detection used for migration from the hardware provided host change-bit in the pgste to a fault based detection method. This reduced the dependency of the host from the storage key to a point where it becomes possible to enable the RCP bypass for KVM guests. The fault based dirty detection will only indicate changes caused by accesses via the guest address space. The hardware based method can detect all changes, even those caused by I/O or accesses via the kernel page table. The KVM/qemu code needs to take this into account. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
-rw-r--r--arch/s390/include/asm/pgtable.h135
-rw-r--r--arch/s390/mm/pgtable.c6
2 files changed, 59 insertions, 82 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 51b002b5667e..b2c630df0ca5 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -309,7 +309,8 @@ extern unsigned long MODULES_END;
309#define PGSTE_HC_BIT 0x00200000UL 309#define PGSTE_HC_BIT 0x00200000UL
310#define PGSTE_GR_BIT 0x00040000UL 310#define PGSTE_GR_BIT 0x00040000UL
311#define PGSTE_GC_BIT 0x00020000UL 311#define PGSTE_GC_BIT 0x00020000UL
312#define PGSTE_IN_BIT 0x00008000UL /* IPTE notify bit */ 312#define PGSTE_UC_BIT 0x00008000UL /* user dirty (migration) */
313#define PGSTE_IN_BIT 0x00004000UL /* IPTE notify bit */
313 314
314#else /* CONFIG_64BIT */ 315#else /* CONFIG_64BIT */
315 316
@@ -391,7 +392,8 @@ extern unsigned long MODULES_END;
391#define PGSTE_HC_BIT 0x0020000000000000UL 392#define PGSTE_HC_BIT 0x0020000000000000UL
392#define PGSTE_GR_BIT 0x0004000000000000UL 393#define PGSTE_GR_BIT 0x0004000000000000UL
393#define PGSTE_GC_BIT 0x0002000000000000UL 394#define PGSTE_GC_BIT 0x0002000000000000UL
394#define PGSTE_IN_BIT 0x0000800000000000UL /* IPTE notify bit */ 395#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */
396#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */
395 397
396#endif /* CONFIG_64BIT */ 398#endif /* CONFIG_64BIT */
397 399
@@ -720,16 +722,6 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
720 address = pte_val(*ptep) & PAGE_MASK; 722 address = pte_val(*ptep) & PAGE_MASK;
721 skey = (unsigned long) page_get_storage_key(address); 723 skey = (unsigned long) page_get_storage_key(address);
722 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 724 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
723 if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) {
724 /* Transfer dirty + referenced bit to host bits in pgste */
725 pgste_val(pgste) |= bits << 52;
726 page_set_storage_key(address, skey ^ bits, 0);
727 } else if (!(pgste_val(pgste) & PGSTE_HR_BIT) &&
728 (bits & _PAGE_REFERENCED)) {
729 /* Transfer referenced bit to host bit in pgste */
730 pgste_val(pgste) |= PGSTE_HR_BIT;
731 page_reset_referenced(address);
732 }
733 /* Transfer page changed & referenced bit to guest bits in pgste */ 725 /* Transfer page changed & referenced bit to guest bits in pgste */
734 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ 726 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
735 /* Copy page access key and fetch protection bit to pgste */ 727 /* Copy page access key and fetch protection bit to pgste */
@@ -740,19 +732,6 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
740 732
741} 733}
742 734
743static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste,
744 struct mm_struct *mm)
745{
746#ifdef CONFIG_PGSTE
747 if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
748 return pgste;
749 /* Get referenced bit from storage key */
750 if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK))
751 pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT;
752#endif
753 return pgste;
754}
755
756static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, 735static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
757 struct mm_struct *mm) 736 struct mm_struct *mm)
758{ 737{
@@ -770,23 +749,30 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
770 * key C/R to 0. 749 * key C/R to 0.
771 */ 750 */
772 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 751 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
752 nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
773 page_set_storage_key(address, nkey, 0); 753 page_set_storage_key(address, nkey, 0);
774#endif 754#endif
775} 755}
776 756
777static inline void pgste_set_pte(pte_t *ptep, pte_t entry) 757static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
778{ 758{
779 if (!MACHINE_HAS_ESOP && 759 if ((pte_val(entry) & _PAGE_PRESENT) &&
780 (pte_val(entry) & _PAGE_PRESENT) && 760 (pte_val(entry) & _PAGE_WRITE) &&
781 (pte_val(entry) & _PAGE_WRITE)) { 761 !(pte_val(entry) & _PAGE_INVALID)) {
782 /* 762 if (!MACHINE_HAS_ESOP) {
783 * Without enhanced suppression-on-protection force 763 /*
784 * the dirty bit on for all writable ptes. 764 * Without enhanced suppression-on-protection force
785 */ 765 * the dirty bit on for all writable ptes.
786 pte_val(entry) |= _PAGE_DIRTY; 766 */
787 pte_val(entry) &= ~_PAGE_PROTECT; 767 pte_val(entry) |= _PAGE_DIRTY;
768 pte_val(entry) &= ~_PAGE_PROTECT;
769 }
770 if (!(pte_val(entry) & _PAGE_PROTECT))
771 /* This pte allows write access, set user-dirty */
772 pgste_val(pgste) |= PGSTE_UC_BIT;
788 } 773 }
789 *ptep = entry; 774 *ptep = entry;
775 return pgste;
790} 776}
791 777
792/** 778/**
@@ -884,7 +870,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
884 pgste = pgste_get_lock(ptep); 870 pgste = pgste_get_lock(ptep);
885 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; 871 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
886 pgste_set_key(ptep, pgste, entry, mm); 872 pgste_set_key(ptep, pgste, entry, mm);
887 pgste_set_pte(ptep, entry); 873 pgste = pgste_set_pte(ptep, pgste, entry);
888 pgste_set_unlock(ptep, pgste); 874 pgste_set_unlock(ptep, pgste);
889 } else { 875 } else {
890 if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1) 876 if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1)
@@ -1030,45 +1016,6 @@ static inline pte_t pte_mkhuge(pte_t pte)
1030} 1016}
1031#endif 1017#endif
1032 1018
1033/*
1034 * Get (and clear) the user dirty bit for a pte.
1035 */
1036static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
1037 pte_t *ptep)
1038{
1039 pgste_t pgste;
1040 int dirty = 0;
1041
1042 if (mm_has_pgste(mm)) {
1043 pgste = pgste_get_lock(ptep);
1044 pgste = pgste_update_all(ptep, pgste, mm);
1045 dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT);
1046 pgste_val(pgste) &= ~PGSTE_HC_BIT;
1047 pgste_set_unlock(ptep, pgste);
1048 return dirty;
1049 }
1050 return dirty;
1051}
1052
1053/*
1054 * Get (and clear) the user referenced bit for a pte.
1055 */
1056static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
1057 pte_t *ptep)
1058{
1059 pgste_t pgste;
1060 int young = 0;
1061
1062 if (mm_has_pgste(mm)) {
1063 pgste = pgste_get_lock(ptep);
1064 pgste = pgste_update_young(ptep, pgste, mm);
1065 young = !!(pgste_val(pgste) & PGSTE_HR_BIT);
1066 pgste_val(pgste) &= ~PGSTE_HR_BIT;
1067 pgste_set_unlock(ptep, pgste);
1068 }
1069 return young;
1070}
1071
1072static inline void __ptep_ipte(unsigned long address, pte_t *ptep) 1019static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
1073{ 1020{
1074 unsigned long pto = (unsigned long) ptep; 1021 unsigned long pto = (unsigned long) ptep;
@@ -1131,6 +1078,36 @@ static inline void ptep_flush_lazy(struct mm_struct *mm,
1131 atomic_sub(0x10000, &mm->context.attach_count); 1078 atomic_sub(0x10000, &mm->context.attach_count);
1132} 1079}
1133 1080
1081/*
1082 * Get (and clear) the user dirty bit for a pte.
1083 */
1084static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
1085 unsigned long addr,
1086 pte_t *ptep)
1087{
1088 pgste_t pgste;
1089 pte_t pte;
1090 int dirty;
1091
1092 if (!mm_has_pgste(mm))
1093 return 0;
1094 pgste = pgste_get_lock(ptep);
1095 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
1096 pgste_val(pgste) &= ~PGSTE_UC_BIT;
1097 pte = *ptep;
1098 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
1099 pgste = pgste_ipte_notify(mm, ptep, pgste);
1100 __ptep_ipte(addr, ptep);
1101 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
1102 pte_val(pte) |= _PAGE_PROTECT;
1103 else
1104 pte_val(pte) |= _PAGE_INVALID;
1105 *ptep = pte;
1106 }
1107 pgste_set_unlock(ptep, pgste);
1108 return dirty;
1109}
1110
1134#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 1111#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
1135static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 1112static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
1136 unsigned long addr, pte_t *ptep) 1113 unsigned long addr, pte_t *ptep)
@@ -1150,7 +1127,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
1150 pte = pte_mkold(pte); 1127 pte = pte_mkold(pte);
1151 1128
1152 if (mm_has_pgste(vma->vm_mm)) { 1129 if (mm_has_pgste(vma->vm_mm)) {
1153 pgste_set_pte(ptep, pte); 1130 pgste = pgste_set_pte(ptep, pgste, pte);
1154 pgste_set_unlock(ptep, pgste); 1131 pgste_set_unlock(ptep, pgste);
1155 } else 1132 } else
1156 *ptep = pte; 1133 *ptep = pte;
@@ -1233,7 +1210,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
1233 if (mm_has_pgste(mm)) { 1210 if (mm_has_pgste(mm)) {
1234 pgste = pgste_get(ptep); 1211 pgste = pgste_get(ptep);
1235 pgste_set_key(ptep, pgste, pte, mm); 1212 pgste_set_key(ptep, pgste, pte, mm);
1236 pgste_set_pte(ptep, pte); 1213 pgste = pgste_set_pte(ptep, pgste, pte);
1237 pgste_set_unlock(ptep, pgste); 1214 pgste_set_unlock(ptep, pgste);
1238 } else 1215 } else
1239 *ptep = pte; 1216 *ptep = pte;
@@ -1314,7 +1291,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
1314 pte = pte_wrprotect(pte); 1291 pte = pte_wrprotect(pte);
1315 1292
1316 if (mm_has_pgste(mm)) { 1293 if (mm_has_pgste(mm)) {
1317 pgste_set_pte(ptep, pte); 1294 pgste = pgste_set_pte(ptep, pgste, pte);
1318 pgste_set_unlock(ptep, pgste); 1295 pgste_set_unlock(ptep, pgste);
1319 } else 1296 } else
1320 *ptep = pte; 1297 *ptep = pte;
@@ -1339,7 +1316,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
1339 ptep_flush_direct(vma->vm_mm, address, ptep); 1316 ptep_flush_direct(vma->vm_mm, address, ptep);
1340 1317
1341 if (mm_has_pgste(vma->vm_mm)) { 1318 if (mm_has_pgste(vma->vm_mm)) {
1342 pgste_set_pte(ptep, entry); 1319 pgste = pgste_set_pte(ptep, pgste, entry);
1343 pgste_set_unlock(ptep, pgste); 1320 pgste_set_unlock(ptep, pgste);
1344 } else 1321 } else
1345 *ptep = entry; 1322 *ptep = entry;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 02a8607bbeb5..1ddf975352a0 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -832,6 +832,7 @@ void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte)
832 } 832 }
833 spin_unlock(&gmap_notifier_lock); 833 spin_unlock(&gmap_notifier_lock);
834} 834}
835EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
835 836
836static inline int page_table_with_pgste(struct page *page) 837static inline int page_table_with_pgste(struct page *page)
837{ 838{
@@ -864,8 +865,7 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
864 atomic_set(&page->_mapcount, 0); 865 atomic_set(&page->_mapcount, 0);
865 table = (unsigned long *) page_to_phys(page); 866 table = (unsigned long *) page_to_phys(page);
866 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); 867 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
867 clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT, 868 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
868 PAGE_SIZE/2);
869 return table; 869 return table;
870} 870}
871 871
@@ -1005,7 +1005,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
1005 /* changing the guest storage key is considered a change of the page */ 1005 /* changing the guest storage key is considered a change of the page */
1006 if ((pgste_val(new) ^ pgste_val(old)) & 1006 if ((pgste_val(new) ^ pgste_val(old)) &
1007 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 1007 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
1008 pgste_val(new) |= PGSTE_HC_BIT; 1008 pgste_val(new) |= PGSTE_UC_BIT;
1009 1009
1010 pgste_set_unlock(ptep, new); 1010 pgste_set_unlock(ptep, new);
1011 pte_unmap_unlock(*ptep, ptl); 1011 pte_unmap_unlock(*ptep, ptl);