aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2016-03-08 05:08:09 -0500
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2016-03-08 09:00:12 -0500
commitebde765c0e85f48534f98779b22349bf00761b61 (patch)
treeaf05d33a2b90519d8296fe248bb8bf70b3355259
parent988b86e69ded17f0f1209fd3ef1c4c7f1567dcc1 (diff)
s390/mm: uninline ptep_xxx functions from pgtable.h
The code in the various ptep_xxx functions has grown quite large, consolidate them to four out-of-line functions: ptep_xchg_direct to exchange a pte with another with immediate flushing ptep_xchg_lazy to exchange a pte with another in a batched update ptep_modify_prot_start to begin a protection flags update ptep_modify_prot_commit to commit a protection flags update Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r--arch/s390/include/asm/pgtable.h374
-rw-r--r--arch/s390/kvm/kvm-s390.c2
-rw-r--r--arch/s390/mm/pgtable.c295
3 files changed, 318 insertions, 353 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 7be9ae808a8b..d102c4e23f91 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -680,69 +680,8 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste)
680#endif 680#endif
681} 681}
682 682
683static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, 683bool pgste_test_and_clear_dirty(struct mm_struct *, unsigned long address);
684 struct mm_struct *mm) 684void ptep_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
685{
686#ifdef CONFIG_PGSTE
687 unsigned long address, bits, skey;
688
689 if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID)
690 return pgste;
691 address = pte_val(pte) & PAGE_MASK;
692 skey = (unsigned long) page_get_storage_key(address);
693 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
694 /* Transfer page changed & referenced bit to guest bits in pgste */
695 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
696 /* Copy page access key and fetch protection bit to pgste */
697 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
698 pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
699#endif
700 return pgste;
701
702}
703
704static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
705 struct mm_struct *mm)
706{
707#ifdef CONFIG_PGSTE
708 unsigned long address;
709 unsigned long nkey;
710
711 if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
712 return;
713 VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
714 address = pte_val(entry) & PAGE_MASK;
715 /*
716 * Set page access key and fetch protection bit from pgste.
717 * The guest C/R information is still in the PGSTE, set real
718 * key C/R to 0.
719 */
720 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
721 nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
722 page_set_storage_key(address, nkey, 0);
723#endif
724}
725
726static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
727{
728 if ((pte_val(entry) & _PAGE_PRESENT) &&
729 (pte_val(entry) & _PAGE_WRITE) &&
730 !(pte_val(entry) & _PAGE_INVALID)) {
731 if (!MACHINE_HAS_ESOP) {
732 /*
733 * Without enhanced suppression-on-protection force
734 * the dirty bit on for all writable ptes.
735 */
736 pte_val(entry) |= _PAGE_DIRTY;
737 pte_val(entry) &= ~_PAGE_PROTECT;
738 }
739 if (!(pte_val(entry) & _PAGE_PROTECT))
740 /* This pte allows write access, set user-dirty */
741 pgste_val(pgste) |= PGSTE_UC_BIT;
742 }
743 *ptep = entry;
744 return pgste;
745}
746 685
747/** 686/**
748 * struct gmap_struct - guest address space 687 * struct gmap_struct - guest address space
@@ -791,47 +730,11 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
791int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); 730int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
792void gmap_discard(struct gmap *, unsigned long from, unsigned long to); 731void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
793void __gmap_zap(struct gmap *, unsigned long gaddr); 732void __gmap_zap(struct gmap *, unsigned long gaddr);
794bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
795 733
796 734
797void gmap_register_ipte_notifier(struct gmap_notifier *); 735void gmap_register_ipte_notifier(struct gmap_notifier *);
798void gmap_unregister_ipte_notifier(struct gmap_notifier *); 736void gmap_unregister_ipte_notifier(struct gmap_notifier *);
799int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); 737int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
800void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
801
802static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
803 unsigned long addr,
804 pte_t *ptep, pgste_t pgste)
805{
806#ifdef CONFIG_PGSTE
807 if (pgste_val(pgste) & PGSTE_IN_BIT) {
808 pgste_val(pgste) &= ~PGSTE_IN_BIT;
809 gmap_do_ipte_notify(mm, addr, ptep);
810 }
811#endif
812 return pgste;
813}
814
815/*
816 * Certain architectures need to do special things when PTEs
817 * within a page table are directly modified. Thus, the following
818 * hook is made available.
819 */
820static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
821 pte_t *ptep, pte_t entry)
822{
823 pgste_t pgste;
824
825 if (mm_has_pgste(mm)) {
826 pgste = pgste_get_lock(ptep);
827 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
828 pgste_set_key(ptep, pgste, entry, mm);
829 pgste = pgste_set_pte(ptep, pgste, entry);
830 pgste_set_unlock(ptep, pgste);
831 } else {
832 *ptep = entry;
833 }
834}
835 738
836/* 739/*
837 * query functions pte_write/pte_dirty/pte_young only work if 740 * query functions pte_write/pte_dirty/pte_young only work if
@@ -998,95 +901,30 @@ static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep)
998 } while (nr != 255); 901 } while (nr != 255);
999} 902}
1000 903
1001static inline void ptep_flush_direct(struct mm_struct *mm,
1002 unsigned long address, pte_t *ptep)
1003{
1004 int active, count;
1005
1006 if (pte_val(*ptep) & _PAGE_INVALID)
1007 return;
1008 active = (mm == current->active_mm) ? 1 : 0;
1009 count = atomic_add_return(0x10000, &mm->context.attach_count);
1010 if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
1011 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
1012 __ptep_ipte_local(address, ptep);
1013 else
1014 __ptep_ipte(address, ptep);
1015 atomic_sub(0x10000, &mm->context.attach_count);
1016}
1017
1018static inline void ptep_flush_lazy(struct mm_struct *mm,
1019 unsigned long address, pte_t *ptep)
1020{
1021 int active, count;
1022
1023 if (pte_val(*ptep) & _PAGE_INVALID)
1024 return;
1025 active = (mm == current->active_mm) ? 1 : 0;
1026 count = atomic_add_return(0x10000, &mm->context.attach_count);
1027 if ((count & 0xffff) <= active) {
1028 pte_val(*ptep) |= _PAGE_INVALID;
1029 mm->context.flush_mm = 1;
1030 } else
1031 __ptep_ipte(address, ptep);
1032 atomic_sub(0x10000, &mm->context.attach_count);
1033}
1034
1035/* 904/*
1036 * Get (and clear) the user dirty bit for a pte. 905 * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
906 * both clear the TLB for the unmapped pte. The reason is that
907 * ptep_get_and_clear is used in common code (e.g. change_pte_range)
908 * to modify an active pte. The sequence is
909 * 1) ptep_get_and_clear
910 * 2) set_pte_at
911 * 3) flush_tlb_range
912 * On s390 the tlb needs to get flushed with the modification of the pte
913 * if the pte is active. The only way how this can be implemented is to
914 * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
915 * is a nop.
1037 */ 916 */
1038static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, 917pte_t ptep_xchg_direct(struct mm_struct *, unsigned long, pte_t *, pte_t);
1039 unsigned long addr, 918pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t);
1040 pte_t *ptep)
1041{
1042 pgste_t pgste;
1043 pte_t pte;
1044 int dirty;
1045
1046 if (!mm_has_pgste(mm))
1047 return 0;
1048 pgste = pgste_get_lock(ptep);
1049 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
1050 pgste_val(pgste) &= ~PGSTE_UC_BIT;
1051 pte = *ptep;
1052 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
1053 pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
1054 __ptep_ipte(addr, ptep);
1055 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
1056 pte_val(pte) |= _PAGE_PROTECT;
1057 else
1058 pte_val(pte) |= _PAGE_INVALID;
1059 *ptep = pte;
1060 }
1061 pgste_set_unlock(ptep, pgste);
1062 return dirty;
1063}
1064 919
1065#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 920#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
1066static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 921static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
1067 unsigned long addr, pte_t *ptep) 922 unsigned long addr, pte_t *ptep)
1068{ 923{
1069 pgste_t pgste; 924 pte_t pte = *ptep;
1070 pte_t pte;
1071 int young;
1072
1073 if (mm_has_pgste(vma->vm_mm)) {
1074 pgste = pgste_get_lock(ptep);
1075 pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
1076 }
1077
1078 pte = *ptep;
1079 ptep_flush_direct(vma->vm_mm, addr, ptep);
1080 young = pte_young(pte);
1081
1082 if (mm_has_pgste(vma->vm_mm)) {
1083 pgste = pgste_update_all(pte, pgste, vma->vm_mm);
1084 pgste = pgste_set_pte(ptep, pgste, pte_mkold(pte));
1085 pgste_set_unlock(ptep, pgste);
1086 } else
1087 *ptep = pte_mkold(pte);
1088 925
1089 return young; 926 pte = ptep_xchg_direct(vma->vm_mm, addr, ptep, pte_mkold(pte));
927 return pte_young(pte);
1090} 928}
1091 929
1092#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH 930#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
@@ -1096,104 +934,22 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
1096 return ptep_test_and_clear_young(vma, address, ptep); 934 return ptep_test_and_clear_young(vma, address, ptep);
1097} 935}
1098 936
1099/*
1100 * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
1101 * both clear the TLB for the unmapped pte. The reason is that
1102 * ptep_get_and_clear is used in common code (e.g. change_pte_range)
1103 * to modify an active pte. The sequence is
1104 * 1) ptep_get_and_clear
1105 * 2) set_pte_at
1106 * 3) flush_tlb_range
1107 * On s390 the tlb needs to get flushed with the modification of the pte
1108 * if the pte is active. The only way how this can be implemented is to
1109 * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
1110 * is a nop.
1111 */
1112#define __HAVE_ARCH_PTEP_GET_AND_CLEAR 937#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
1113static inline pte_t ptep_get_and_clear(struct mm_struct *mm, 938static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
1114 unsigned long address, pte_t *ptep) 939 unsigned long addr, pte_t *ptep)
1115{ 940{
1116 pgste_t pgste; 941 return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
1117 pte_t pte;
1118
1119 if (mm_has_pgste(mm)) {
1120 pgste = pgste_get_lock(ptep);
1121 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1122 }
1123
1124 pte = *ptep;
1125 ptep_flush_lazy(mm, address, ptep);
1126 pte_val(*ptep) = _PAGE_INVALID;
1127
1128 if (mm_has_pgste(mm)) {
1129 pgste = pgste_update_all(pte, pgste, mm);
1130 pgste_set_unlock(ptep, pgste);
1131 }
1132 return pte;
1133} 942}
1134 943
1135#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION 944#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
1136static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, 945pte_t ptep_modify_prot_start(struct mm_struct *, unsigned long, pte_t *);
1137 unsigned long address, 946void ptep_modify_prot_commit(struct mm_struct *, unsigned long, pte_t *, pte_t);
1138 pte_t *ptep)
1139{
1140 pgste_t pgste;
1141 pte_t pte;
1142
1143 if (mm_has_pgste(mm)) {
1144 pgste = pgste_get_lock(ptep);
1145 pgste_ipte_notify(mm, address, ptep, pgste);
1146 }
1147
1148 pte = *ptep;
1149 ptep_flush_lazy(mm, address, ptep);
1150
1151 if (mm_has_pgste(mm)) {
1152 pgste = pgste_update_all(pte, pgste, mm);
1153 pgste_set(ptep, pgste);
1154 }
1155 return pte;
1156}
1157
1158static inline void ptep_modify_prot_commit(struct mm_struct *mm,
1159 unsigned long address,
1160 pte_t *ptep, pte_t pte)
1161{
1162 pgste_t pgste;
1163
1164 if (mm_has_pgste(mm)) {
1165 pgste = pgste_get(ptep);
1166 pgste_set_key(ptep, pgste, pte, mm);
1167 pgste = pgste_set_pte(ptep, pgste, pte);
1168 pgste_set_unlock(ptep, pgste);
1169 } else
1170 *ptep = pte;
1171}
1172 947
1173#define __HAVE_ARCH_PTEP_CLEAR_FLUSH 948#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
1174static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, 949static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
1175 unsigned long address, pte_t *ptep) 950 unsigned long addr, pte_t *ptep)
1176{ 951{
1177 pgste_t pgste; 952 return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
1178 pte_t pte;
1179
1180 if (mm_has_pgste(vma->vm_mm)) {
1181 pgste = pgste_get_lock(ptep);
1182 pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
1183 }
1184
1185 pte = *ptep;
1186 ptep_flush_direct(vma->vm_mm, address, ptep);
1187 pte_val(*ptep) = _PAGE_INVALID;
1188
1189 if (mm_has_pgste(vma->vm_mm)) {
1190 if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
1191 _PGSTE_GPS_USAGE_UNUSED)
1192 pte_val(pte) |= _PAGE_UNUSED;
1193 pgste = pgste_update_all(pte, pgste, vma->vm_mm);
1194 pgste_set_unlock(ptep, pgste);
1195 }
1196 return pte;
1197} 953}
1198 954
1199/* 955/*
@@ -1205,80 +961,52 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
1205 */ 961 */
1206#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL 962#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
1207static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, 963static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
1208 unsigned long address, 964 unsigned long addr,
1209 pte_t *ptep, int full) 965 pte_t *ptep, int full)
1210{ 966{
1211 pgste_t pgste; 967 if (full) {
1212 pte_t pte; 968 pte_t pte = *ptep;
1213 969 *ptep = __pte(_PAGE_INVALID);
1214 if (!full && mm_has_pgste(mm)) { 970 return pte;
1215 pgste = pgste_get_lock(ptep);
1216 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1217 }
1218
1219 pte = *ptep;
1220 if (!full)
1221 ptep_flush_lazy(mm, address, ptep);
1222 pte_val(*ptep) = _PAGE_INVALID;
1223
1224 if (!full && mm_has_pgste(mm)) {
1225 pgste = pgste_update_all(pte, pgste, mm);
1226 pgste_set_unlock(ptep, pgste);
1227 } 971 }
1228 return pte; 972 return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
1229} 973}
1230 974
1231#define __HAVE_ARCH_PTEP_SET_WRPROTECT 975#define __HAVE_ARCH_PTEP_SET_WRPROTECT
1232static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, 976static inline void ptep_set_wrprotect(struct mm_struct *mm,
1233 unsigned long address, pte_t *ptep) 977 unsigned long addr, pte_t *ptep)
1234{ 978{
1235 pgste_t pgste;
1236 pte_t pte = *ptep; 979 pte_t pte = *ptep;
1237 980
1238 if (pte_write(pte)) { 981 if (pte_write(pte))
1239 if (mm_has_pgste(mm)) { 982 ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte));
1240 pgste = pgste_get_lock(ptep);
1241 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1242 }
1243
1244 ptep_flush_lazy(mm, address, ptep);
1245 pte = pte_wrprotect(pte);
1246
1247 if (mm_has_pgste(mm)) {
1248 pgste = pgste_set_pte(ptep, pgste, pte);
1249 pgste_set_unlock(ptep, pgste);
1250 } else
1251 *ptep = pte;
1252 }
1253 return pte;
1254} 983}
1255 984
1256#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 985#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
1257static inline int ptep_set_access_flags(struct vm_area_struct *vma, 986static inline int ptep_set_access_flags(struct vm_area_struct *vma,
1258 unsigned long address, pte_t *ptep, 987 unsigned long addr, pte_t *ptep,
1259 pte_t entry, int dirty) 988 pte_t entry, int dirty)
1260{ 989{
1261 pgste_t pgste; 990 if (pte_same(*ptep, entry))
1262 pte_t oldpte;
1263
1264 oldpte = *ptep;
1265 if (pte_same(oldpte, entry))
1266 return 0; 991 return 0;
1267 if (mm_has_pgste(vma->vm_mm)) { 992 ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
1268 pgste = pgste_get_lock(ptep); 993 return 1;
1269 pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); 994}
1270 }
1271 995
1272 ptep_flush_direct(vma->vm_mm, address, ptep); 996void set_pte_pgste_at(struct mm_struct *, unsigned long, pte_t *, pte_t);
1273 997
1274 if (mm_has_pgste(vma->vm_mm)) { 998/*
1275 if (pte_val(oldpte) & _PAGE_INVALID) 999 * Certain architectures need to do special things when PTEs
1276 pgste_set_key(ptep, pgste, entry, vma->vm_mm); 1000 * within a page table are directly modified. Thus, the following
1277 pgste = pgste_set_pte(ptep, pgste, entry); 1001 * hook is made available.
1278 pgste_set_unlock(ptep, pgste); 1002 */
1279 } else 1003static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
1004 pte_t *ptep, pte_t entry)
1005{
1006 if (mm_has_pgste(mm))
1007 set_pte_pgste_at(mm, addr, ptep, entry);
1008 else
1280 *ptep = entry; 1009 *ptep = entry;
1281 return 1;
1282} 1010}
1283 1011
1284/* 1012/*
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 4af21c771f9b..616e0a16ee88 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -280,7 +280,7 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm,
280 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { 280 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
281 address = gfn_to_hva_memslot(memslot, cur_gfn); 281 address = gfn_to_hva_memslot(memslot, cur_gfn);
282 282
283 if (gmap_test_and_clear_dirty(address, gmap)) 283 if (pgste_test_and_clear_dirty(gmap->mm, address))
284 mark_page_dirty(kvm, cur_gfn); 284 mark_page_dirty(kvm, cur_gfn);
285 } 285 }
286 up_read(&gmap->mm->mmap_sem); 286 up_read(&gmap->mm->mmap_sem);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 6acd7174fe75..30033aad17da 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -772,7 +772,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
772EXPORT_SYMBOL_GPL(gmap_ipte_notify); 772EXPORT_SYMBOL_GPL(gmap_ipte_notify);
773 773
774/** 774/**
775 * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. 775 * ptep_ipte_notify - call all invalidation callbacks for a specific pte.
776 * @mm: pointer to the process mm_struct 776 * @mm: pointer to the process mm_struct
777 * @addr: virtual address in the process address space 777 * @addr: virtual address in the process address space
778 * @pte: pointer to the page table entry 778 * @pte: pointer to the page table entry
@@ -780,7 +780,7 @@ EXPORT_SYMBOL_GPL(gmap_ipte_notify);
780 * This function is assumed to be called with the page table lock held 780 * This function is assumed to be called with the page table lock held
781 * for the pte to notify. 781 * for the pte to notify.
782 */ 782 */
783void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) 783void ptep_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
784{ 784{
785 unsigned long offset, gaddr; 785 unsigned long offset, gaddr;
786 unsigned long *table; 786 unsigned long *table;
@@ -801,7 +801,7 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
801 } 801 }
802 spin_unlock(&gmap_notifier_lock); 802 spin_unlock(&gmap_notifier_lock);
803} 803}
804EXPORT_SYMBOL_GPL(gmap_do_ipte_notify); 804EXPORT_SYMBOL_GPL(ptep_ipte_notify);
805 805
806int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 806int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
807 unsigned long key, bool nq) 807 unsigned long key, bool nq)
@@ -1158,6 +1158,266 @@ static inline void thp_split_mm(struct mm_struct *mm)
1158} 1158}
1159#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1159#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1160 1160
1161static inline pte_t ptep_flush_direct(struct mm_struct *mm,
1162 unsigned long addr, pte_t *ptep)
1163{
1164 int active, count;
1165 pte_t old;
1166
1167 old = *ptep;
1168 if (unlikely(pte_val(old) & _PAGE_INVALID))
1169 return old;
1170 active = (mm == current->active_mm) ? 1 : 0;
1171 count = atomic_add_return(0x10000, &mm->context.attach_count);
1172 if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
1173 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
1174 __ptep_ipte_local(addr, ptep);
1175 else
1176 __ptep_ipte(addr, ptep);
1177 atomic_sub(0x10000, &mm->context.attach_count);
1178 return old;
1179}
1180
1181static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
1182 unsigned long addr, pte_t *ptep)
1183{
1184 int active, count;
1185 pte_t old;
1186
1187 old = *ptep;
1188 if (unlikely(pte_val(old) & _PAGE_INVALID))
1189 return old;
1190 active = (mm == current->active_mm) ? 1 : 0;
1191 count = atomic_add_return(0x10000, &mm->context.attach_count);
1192 if ((count & 0xffff) <= active) {
1193 pte_val(*ptep) |= _PAGE_INVALID;
1194 mm->context.flush_mm = 1;
1195 } else
1196 __ptep_ipte(addr, ptep);
1197 atomic_sub(0x10000, &mm->context.attach_count);
1198 return old;
1199}
1200
1201static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
1202 struct mm_struct *mm)
1203{
1204#ifdef CONFIG_PGSTE
1205 unsigned long address, bits, skey;
1206
1207 if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID)
1208 return pgste;
1209 address = pte_val(pte) & PAGE_MASK;
1210 skey = (unsigned long) page_get_storage_key(address);
1211 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
1212 /* Transfer page changed & referenced bit to guest bits in pgste */
1213 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
1214 /* Copy page access key and fetch protection bit to pgste */
1215 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
1216 pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
1217#endif
1218 return pgste;
1219
1220}
1221
1222static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
1223 struct mm_struct *mm)
1224{
1225#ifdef CONFIG_PGSTE
1226 unsigned long address;
1227 unsigned long nkey;
1228
1229 if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
1230 return;
1231 VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
1232 address = pte_val(entry) & PAGE_MASK;
1233 /*
1234 * Set page access key and fetch protection bit from pgste.
1235 * The guest C/R information is still in the PGSTE, set real
1236 * key C/R to 0.
1237 */
1238 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
1239 nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
1240 page_set_storage_key(address, nkey, 0);
1241#endif
1242}
1243
1244static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
1245{
1246#ifdef CONFIG_PGSTE
1247 if ((pte_val(entry) & _PAGE_PRESENT) &&
1248 (pte_val(entry) & _PAGE_WRITE) &&
1249 !(pte_val(entry) & _PAGE_INVALID)) {
1250 if (!MACHINE_HAS_ESOP) {
1251 /*
1252 * Without enhanced suppression-on-protection force
1253 * the dirty bit on for all writable ptes.
1254 */
1255 pte_val(entry) |= _PAGE_DIRTY;
1256 pte_val(entry) &= ~_PAGE_PROTECT;
1257 }
1258 if (!(pte_val(entry) & _PAGE_PROTECT))
1259 /* This pte allows write access, set user-dirty */
1260 pgste_val(pgste) |= PGSTE_UC_BIT;
1261 }
1262#endif
1263 *ptep = entry;
1264 return pgste;
1265}
1266
1267static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
1268 unsigned long addr,
1269 pte_t *ptep, pgste_t pgste)
1270{
1271#ifdef CONFIG_PGSTE
1272 if (pgste_val(pgste) & PGSTE_IN_BIT) {
1273 pgste_val(pgste) &= ~PGSTE_IN_BIT;
1274 ptep_ipte_notify(mm, addr, ptep);
1275 }
1276#endif
1277 return pgste;
1278}
1279
1280#ifdef CONFIG_PGSTE
1281/*
1282 * Test and reset if a guest page is dirty
1283 */
1284bool pgste_test_and_clear_dirty(struct mm_struct *mm, unsigned long addr)
1285{
1286 spinlock_t *ptl;
1287 pgste_t pgste;
1288 pte_t *ptep;
1289 pte_t pte;
1290 bool dirty;
1291
1292 ptep = get_locked_pte(mm, addr, &ptl);
1293 if (unlikely(!ptep))
1294 return false;
1295
1296 pgste = pgste_get_lock(ptep);
1297 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
1298 pgste_val(pgste) &= ~PGSTE_UC_BIT;
1299 pte = *ptep;
1300 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
1301 pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
1302 __ptep_ipte(addr, ptep);
1303 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
1304 pte_val(pte) |= _PAGE_PROTECT;
1305 else
1306 pte_val(pte) |= _PAGE_INVALID;
1307 *ptep = pte;
1308 }
1309 pgste_set_unlock(ptep, pgste);
1310
1311 spin_unlock(ptl);
1312 return dirty;
1313}
1314EXPORT_SYMBOL_GPL(pgste_test_and_clear_dirty);
1315
1316void set_pte_pgste_at(struct mm_struct *mm, unsigned long addr,
1317 pte_t *ptep, pte_t entry)
1318{
1319 pgste_t pgste;
1320
1321 /* the mm_has_pgste() check is done in set_pte_at() */
1322 pgste = pgste_get_lock(ptep);
1323 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
1324 pgste_set_key(ptep, pgste, entry, mm);
1325 pgste = pgste_set_pte(ptep, pgste, entry);
1326 pgste_set_unlock(ptep, pgste);
1327}
1328EXPORT_SYMBOL(set_pte_pgste_at);
1329#endif
1330
1331static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
1332 unsigned long addr, pte_t *ptep)
1333{
1334 pgste_t pgste = __pgste(0);
1335
1336 if (mm_has_pgste(mm)) {
1337 pgste = pgste_get_lock(ptep);
1338 pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
1339 }
1340 return pgste;
1341}
1342
1343static inline void ptep_xchg_commit(struct mm_struct *mm,
1344 unsigned long addr, pte_t *ptep,
1345 pgste_t pgste, pte_t old, pte_t new)
1346{
1347 if (mm_has_pgste(mm)) {
1348 if (pte_val(old) & _PAGE_INVALID)
1349 pgste_set_key(ptep, pgste, new, mm);
1350 if (pte_val(new) & _PAGE_INVALID) {
1351 pgste = pgste_update_all(old, pgste, mm);
1352 if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
1353 _PGSTE_GPS_USAGE_UNUSED)
1354 pte_val(old) |= _PAGE_UNUSED;
1355 }
1356 pgste = pgste_set_pte(ptep, pgste, new);
1357 pgste_set_unlock(ptep, pgste);
1358 } else {
1359 *ptep = new;
1360 }
1361}
1362
1363pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
1364 pte_t *ptep, pte_t new)
1365{
1366 pgste_t pgste;
1367 pte_t old;
1368
1369 pgste = ptep_xchg_start(mm, addr, ptep);
1370 old = ptep_flush_direct(mm, addr, ptep);
1371 ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
1372 return old;
1373}
1374EXPORT_SYMBOL(ptep_xchg_direct);
1375
1376pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
1377 pte_t *ptep, pte_t new)
1378{
1379 pgste_t pgste;
1380 pte_t old;
1381
1382 pgste = ptep_xchg_start(mm, addr, ptep);
1383 old = ptep_flush_lazy(mm, addr, ptep);
1384 ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
1385 return old;
1386}
1387EXPORT_SYMBOL(ptep_xchg_lazy);
1388
1389pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
1390 pte_t *ptep)
1391{
1392 pgste_t pgste;
1393 pte_t old;
1394
1395 pgste = ptep_xchg_start(mm, addr, ptep);
1396 old = ptep_flush_lazy(mm, addr, ptep);
1397 if (mm_has_pgste(mm)) {
1398 pgste = pgste_update_all(old, pgste, mm);
1399 pgste_set(ptep, pgste);
1400 }
1401 return old;
1402}
1403EXPORT_SYMBOL(ptep_modify_prot_start);
1404
1405void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
1406 pte_t *ptep, pte_t pte)
1407{
1408 pgste_t pgste;
1409
1410 if (mm_has_pgste(mm)) {
1411 pgste = pgste_get(ptep);
1412 pgste_set_key(ptep, pgste, pte, mm);
1413 pgste = pgste_set_pte(ptep, pgste, pte);
1414 pgste_set_unlock(ptep, pgste);
1415 } else {
1416 *ptep = pte;
1417 }
1418}
1419EXPORT_SYMBOL(ptep_modify_prot_commit);
1420
1161/* 1421/*
1162 * switch on pgstes for its userspace process (for kvm) 1422 * switch on pgstes for its userspace process (for kvm)
1163 */ 1423 */
@@ -1190,17 +1450,15 @@ static int __s390_enable_skey(pte_t *pte, unsigned long addr,
1190 unsigned long ptev; 1450 unsigned long ptev;
1191 pgste_t pgste; 1451 pgste_t pgste;
1192 1452
1193 pgste = pgste_get_lock(pte);
1194 /* 1453 /*
1195 * Remove all zero page mappings, 1454 * Remove all zero page mappings,
1196 * after establishing a policy to forbid zero page mappings 1455 * after establishing a policy to forbid zero page mappings
1197 * following faults for that page will get fresh anonymous pages 1456 * following faults for that page will get fresh anonymous pages
1198 */ 1457 */
1199 if (is_zero_pfn(pte_pfn(*pte))) { 1458 if (is_zero_pfn(pte_pfn(*pte)))
1200 ptep_flush_direct(walk->mm, addr, pte); 1459 ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID));
1201 pte_val(*pte) = _PAGE_INVALID;
1202 }
1203 /* Clear storage key */ 1460 /* Clear storage key */
1461 pgste = pgste_get_lock(pte);
1204 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | 1462 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
1205 PGSTE_GR_BIT | PGSTE_GC_BIT); 1463 PGSTE_GR_BIT | PGSTE_GC_BIT);
1206 ptev = pte_val(*pte); 1464 ptev = pte_val(*pte);
@@ -1266,27 +1524,6 @@ void s390_reset_cmma(struct mm_struct *mm)
1266} 1524}
1267EXPORT_SYMBOL_GPL(s390_reset_cmma); 1525EXPORT_SYMBOL_GPL(s390_reset_cmma);
1268 1526
1269/*
1270 * Test and reset if a guest page is dirty
1271 */
1272bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap)
1273{
1274 pte_t *pte;
1275 spinlock_t *ptl;
1276 bool dirty = false;
1277
1278 pte = get_locked_pte(gmap->mm, address, &ptl);
1279 if (unlikely(!pte))
1280 return false;
1281
1282 if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte))
1283 dirty = true;
1284
1285 spin_unlock(ptl);
1286 return dirty;
1287}
1288EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty);
1289
1290#ifdef CONFIG_TRANSPARENT_HUGEPAGE 1527#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1291int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, 1528int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
1292 pmd_t *pmdp) 1529 pmd_t *pmdp)