aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2014-08-26 08:31:44 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2014-08-26 08:31:44 -0400
commita7428c3ded540fb10dd32fd6c45352cb314f3569 (patch)
tree002d2ad68a677f97ac57acd3eaaa62082d4d7b66
parent54ad89b05ec49b90790de814647b244d3d2cc5ca (diff)
parentf079e9521464aa522d56af2a58a1666ca126bf6f (diff)
Merge tag 'kvm-s390-next-20140825' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
KVM: s390: Fixes and features for 3.18 part 1 1. The usual cleanups: get rid of duplicate code, use defines, factor out the sync_reg handling, additional docs for sync_regs, better error handling on interrupt injection 2. We use KVM_REQ_TLB_FLUSH instead of open coding tlb flushes 3. Additional registers for kvm_run sync regs. This is usually not needed in the fast path due to eventfd/irqfd, but kvm stat claims that we reduced the overhead of console output by ~50% on my system 4. A rework of the gmap infrastructure. This is the 2nd step towards host large page support (after getting rid of the storage key dependency). We introduces two radix trees to store the guest-to-host and host-to-guest translations. This gets us rid of most of the page-table walks in the gmap code. Only one in __gmap_link is left, this one is required to link the shadow page table to the process page table. Finally this contains the plumbing to support gmap page tables with less than 5 levels.
-rw-r--r--Documentation/virtual/kvm/api.txt4
-rw-r--r--arch/s390/include/asm/pgalloc.h8
-rw-r--r--arch/s390/include/asm/pgtable.h72
-rw-r--r--arch/s390/include/asm/tlb.h2
-rw-r--r--arch/s390/include/uapi/asm/kvm.h10
-rw-r--r--arch/s390/kvm/diag.c8
-rw-r--r--arch/s390/kvm/interrupt.c145
-rw-r--r--arch/s390/kvm/kvm-s390.c99
-rw-r--r--arch/s390/kvm/kvm-s390.h5
-rw-r--r--arch/s390/kvm/priv.c11
-rw-r--r--arch/s390/mm/fault.c25
-rw-r--r--arch/s390/mm/pgtable.c695
-rw-r--r--arch/s390/mm/vmem.c2
13 files changed, 501 insertions, 585 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index beae3fde075e..6485750ae08a 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2861,6 +2861,10 @@ kvm_valid_regs for specific bits. These bits are architecture specific
2861and usually define the validity of a groups of registers. (e.g. one bit 2861and usually define the validity of a groups of registers. (e.g. one bit
2862 for general purpose registers) 2862 for general purpose registers)
2863 2863
2864Please note that the kernel is allowed to use the kvm_run structure as the
2865primary storage for certain register types. Therefore, the kernel may use the
2866values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.
2867
2864}; 2868};
2865 2869
2866 2870
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 9e18a61d3df3..d39a31c3cdf2 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -18,9 +18,9 @@
18unsigned long *crst_table_alloc(struct mm_struct *); 18unsigned long *crst_table_alloc(struct mm_struct *);
19void crst_table_free(struct mm_struct *, unsigned long *); 19void crst_table_free(struct mm_struct *, unsigned long *);
20 20
21unsigned long *page_table_alloc(struct mm_struct *, unsigned long); 21unsigned long *page_table_alloc(struct mm_struct *);
22void page_table_free(struct mm_struct *, unsigned long *); 22void page_table_free(struct mm_struct *, unsigned long *);
23void page_table_free_rcu(struct mmu_gather *, unsigned long *); 23void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
24 24
25void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long, 25void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
26 bool init_skey); 26 bool init_skey);
@@ -145,8 +145,8 @@ static inline void pmd_populate(struct mm_struct *mm,
145/* 145/*
146 * page table entry allocation/free routines. 146 * page table entry allocation/free routines.
147 */ 147 */
148#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) 148#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
149#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) 149#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
150 150
151#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) 151#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
152#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) 152#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index b76317c1f3eb..0242588ded67 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -30,6 +30,7 @@
30#include <linux/sched.h> 30#include <linux/sched.h>
31#include <linux/mm_types.h> 31#include <linux/mm_types.h>
32#include <linux/page-flags.h> 32#include <linux/page-flags.h>
33#include <linux/radix-tree.h>
33#include <asm/bug.h> 34#include <asm/bug.h>
34#include <asm/page.h> 35#include <asm/page.h>
35 36
@@ -789,82 +790,67 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
789 790
790/** 791/**
791 * struct gmap_struct - guest address space 792 * struct gmap_struct - guest address space
793 * @crst_list: list of all crst tables used in the guest address space
792 * @mm: pointer to the parent mm_struct 794 * @mm: pointer to the parent mm_struct
795 * @guest_to_host: radix tree with guest to host address translation
796 * @host_to_guest: radix tree with pointer to segment table entries
797 * @guest_table_lock: spinlock to protect all entries in the guest page table
793 * @table: pointer to the page directory 798 * @table: pointer to the page directory
794 * @asce: address space control element for gmap page table 799 * @asce: address space control element for gmap page table
795 * @crst_list: list of all crst tables used in the guest address space
796 * @pfault_enabled: defines if pfaults are applicable for the guest 800 * @pfault_enabled: defines if pfaults are applicable for the guest
797 */ 801 */
798struct gmap { 802struct gmap {
799 struct list_head list; 803 struct list_head list;
804 struct list_head crst_list;
800 struct mm_struct *mm; 805 struct mm_struct *mm;
806 struct radix_tree_root guest_to_host;
807 struct radix_tree_root host_to_guest;
808 spinlock_t guest_table_lock;
801 unsigned long *table; 809 unsigned long *table;
802 unsigned long asce; 810 unsigned long asce;
811 unsigned long asce_end;
803 void *private; 812 void *private;
804 struct list_head crst_list;
805 bool pfault_enabled; 813 bool pfault_enabled;
806}; 814};
807 815
808/** 816/**
809 * struct gmap_rmap - reverse mapping for segment table entries
810 * @gmap: pointer to the gmap_struct
811 * @entry: pointer to a segment table entry
812 * @vmaddr: virtual address in the guest address space
813 */
814struct gmap_rmap {
815 struct list_head list;
816 struct gmap *gmap;
817 unsigned long *entry;
818 unsigned long vmaddr;
819};
820
821/**
822 * struct gmap_pgtable - gmap information attached to a page table
823 * @vmaddr: address of the 1MB segment in the process virtual memory
824 * @mapper: list of segment table entries mapping a page table
825 */
826struct gmap_pgtable {
827 unsigned long vmaddr;
828 struct list_head mapper;
829};
830
831/**
832 * struct gmap_notifier - notify function block for page invalidation 817 * struct gmap_notifier - notify function block for page invalidation
833 * @notifier_call: address of callback function 818 * @notifier_call: address of callback function
834 */ 819 */
835struct gmap_notifier { 820struct gmap_notifier {
836 struct list_head list; 821 struct list_head list;
837 void (*notifier_call)(struct gmap *gmap, unsigned long address); 822 void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
838}; 823};
839 824
840struct gmap *gmap_alloc(struct mm_struct *mm); 825struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
841void gmap_free(struct gmap *gmap); 826void gmap_free(struct gmap *gmap);
842void gmap_enable(struct gmap *gmap); 827void gmap_enable(struct gmap *gmap);
843void gmap_disable(struct gmap *gmap); 828void gmap_disable(struct gmap *gmap);
844int gmap_map_segment(struct gmap *gmap, unsigned long from, 829int gmap_map_segment(struct gmap *gmap, unsigned long from,
845 unsigned long to, unsigned long len); 830 unsigned long to, unsigned long len);
846int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); 831int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
847unsigned long __gmap_translate(unsigned long address, struct gmap *); 832unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
848unsigned long gmap_translate(unsigned long address, struct gmap *); 833unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
849unsigned long __gmap_fault(unsigned long address, struct gmap *); 834int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
850unsigned long gmap_fault(unsigned long address, struct gmap *); 835int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
851void gmap_discard(unsigned long from, unsigned long to, struct gmap *); 836void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
852void __gmap_zap(unsigned long address, struct gmap *); 837void __gmap_zap(struct gmap *, unsigned long gaddr);
853bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *); 838bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
854 839
855 840
856void gmap_register_ipte_notifier(struct gmap_notifier *); 841void gmap_register_ipte_notifier(struct gmap_notifier *);
857void gmap_unregister_ipte_notifier(struct gmap_notifier *); 842void gmap_unregister_ipte_notifier(struct gmap_notifier *);
858int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); 843int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
859void gmap_do_ipte_notify(struct mm_struct *, pte_t *); 844void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
860 845
861static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, 846static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
847 unsigned long addr,
862 pte_t *ptep, pgste_t pgste) 848 pte_t *ptep, pgste_t pgste)
863{ 849{
864#ifdef CONFIG_PGSTE 850#ifdef CONFIG_PGSTE
865 if (pgste_val(pgste) & PGSTE_IN_BIT) { 851 if (pgste_val(pgste) & PGSTE_IN_BIT) {
866 pgste_val(pgste) &= ~PGSTE_IN_BIT; 852 pgste_val(pgste) &= ~PGSTE_IN_BIT;
867 gmap_do_ipte_notify(mm, ptep); 853 gmap_do_ipte_notify(mm, addr, ptep);
868 } 854 }
869#endif 855#endif
870 return pgste; 856 return pgste;
@@ -1110,7 +1096,7 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
1110 pgste_val(pgste) &= ~PGSTE_UC_BIT; 1096 pgste_val(pgste) &= ~PGSTE_UC_BIT;
1111 pte = *ptep; 1097 pte = *ptep;
1112 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 1098 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
1113 pgste = pgste_ipte_notify(mm, ptep, pgste); 1099 pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
1114 __ptep_ipte(addr, ptep); 1100 __ptep_ipte(addr, ptep);
1115 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 1101 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
1116 pte_val(pte) |= _PAGE_PROTECT; 1102 pte_val(pte) |= _PAGE_PROTECT;
@@ -1132,7 +1118,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
1132 1118
1133 if (mm_has_pgste(vma->vm_mm)) { 1119 if (mm_has_pgste(vma->vm_mm)) {
1134 pgste = pgste_get_lock(ptep); 1120 pgste = pgste_get_lock(ptep);
1135 pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); 1121 pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
1136 } 1122 }
1137 1123
1138 pte = *ptep; 1124 pte = *ptep;
@@ -1178,7 +1164,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
1178 1164
1179 if (mm_has_pgste(mm)) { 1165 if (mm_has_pgste(mm)) {
1180 pgste = pgste_get_lock(ptep); 1166 pgste = pgste_get_lock(ptep);
1181 pgste = pgste_ipte_notify(mm, ptep, pgste); 1167 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1182 } 1168 }
1183 1169
1184 pte = *ptep; 1170 pte = *ptep;
@@ -1202,7 +1188,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
1202 1188
1203 if (mm_has_pgste(mm)) { 1189 if (mm_has_pgste(mm)) {
1204 pgste = pgste_get_lock(ptep); 1190 pgste = pgste_get_lock(ptep);
1205 pgste_ipte_notify(mm, ptep, pgste); 1191 pgste_ipte_notify(mm, address, ptep, pgste);
1206 } 1192 }
1207 1193
1208 pte = *ptep; 1194 pte = *ptep;
@@ -1239,7 +1225,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
1239 1225
1240 if (mm_has_pgste(vma->vm_mm)) { 1226 if (mm_has_pgste(vma->vm_mm)) {
1241 pgste = pgste_get_lock(ptep); 1227 pgste = pgste_get_lock(ptep);
1242 pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); 1228 pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
1243 } 1229 }
1244 1230
1245 pte = *ptep; 1231 pte = *ptep;
@@ -1273,7 +1259,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
1273 1259
1274 if (!full && mm_has_pgste(mm)) { 1260 if (!full && mm_has_pgste(mm)) {
1275 pgste = pgste_get_lock(ptep); 1261 pgste = pgste_get_lock(ptep);
1276 pgste = pgste_ipte_notify(mm, ptep, pgste); 1262 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1277 } 1263 }
1278 1264
1279 pte = *ptep; 1265 pte = *ptep;
@@ -1298,7 +1284,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
1298 if (pte_write(pte)) { 1284 if (pte_write(pte)) {
1299 if (mm_has_pgste(mm)) { 1285 if (mm_has_pgste(mm)) {
1300 pgste = pgste_get_lock(ptep); 1286 pgste = pgste_get_lock(ptep);
1301 pgste = pgste_ipte_notify(mm, ptep, pgste); 1287 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1302 } 1288 }
1303 1289
1304 ptep_flush_lazy(mm, address, ptep); 1290 ptep_flush_lazy(mm, address, ptep);
@@ -1324,7 +1310,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
1324 return 0; 1310 return 0;
1325 if (mm_has_pgste(vma->vm_mm)) { 1311 if (mm_has_pgste(vma->vm_mm)) {
1326 pgste = pgste_get_lock(ptep); 1312 pgste = pgste_get_lock(ptep);
1327 pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); 1313 pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
1328 } 1314 }
1329 1315
1330 ptep_flush_direct(vma->vm_mm, address, ptep); 1316 ptep_flush_direct(vma->vm_mm, address, ptep);
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index a25f09fbaf36..572c59949004 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -105,7 +105,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
105static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, 105static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
106 unsigned long address) 106 unsigned long address)
107{ 107{
108 page_table_free_rcu(tlb, (unsigned long *) pte); 108 page_table_free_rcu(tlb, (unsigned long *) pte, address);
109} 109}
110 110
111/* 111/*
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 0fc26430a1e5..48eda3ab4944 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -111,12 +111,22 @@ struct kvm_guest_debug_arch {
111#define KVM_SYNC_GPRS (1UL << 1) 111#define KVM_SYNC_GPRS (1UL << 1)
112#define KVM_SYNC_ACRS (1UL << 2) 112#define KVM_SYNC_ACRS (1UL << 2)
113#define KVM_SYNC_CRS (1UL << 3) 113#define KVM_SYNC_CRS (1UL << 3)
114#define KVM_SYNC_ARCH0 (1UL << 4)
115#define KVM_SYNC_PFAULT (1UL << 5)
114/* definition of registers in kvm_run */ 116/* definition of registers in kvm_run */
115struct kvm_sync_regs { 117struct kvm_sync_regs {
116 __u64 prefix; /* prefix register */ 118 __u64 prefix; /* prefix register */
117 __u64 gprs[16]; /* general purpose registers */ 119 __u64 gprs[16]; /* general purpose registers */
118 __u32 acrs[16]; /* access registers */ 120 __u32 acrs[16]; /* access registers */
119 __u64 crs[16]; /* control registers */ 121 __u64 crs[16]; /* control registers */
122 __u64 todpr; /* tod programmable register [ARCH0] */
123 __u64 cputm; /* cpu timer [ARCH0] */
124 __u64 ckc; /* clock comparator [ARCH0] */
125 __u64 pp; /* program parameter [ARCH0] */
126 __u64 gbea; /* guest breaking-event address [ARCH0] */
127 __u64 pft; /* pfault token [PFAULT] */
128 __u64 pfs; /* pfault select [PFAULT] */
129 __u64 pfc; /* pfault compare [PFAULT] */
120}; 130};
121 131
122#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) 132#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 59bd8f991b98..b374b6cb7785 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -37,13 +37,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
37 37
38 /* we checked for start > end above */ 38 /* we checked for start > end above */
39 if (end < prefix || start >= prefix + 2 * PAGE_SIZE) { 39 if (end < prefix || start >= prefix + 2 * PAGE_SIZE) {
40 gmap_discard(start, end, vcpu->arch.gmap); 40 gmap_discard(vcpu->arch.gmap, start, end);
41 } else { 41 } else {
42 if (start < prefix) 42 if (start < prefix)
43 gmap_discard(start, prefix, vcpu->arch.gmap); 43 gmap_discard(vcpu->arch.gmap, start, prefix);
44 if (end >= prefix) 44 if (end >= prefix)
45 gmap_discard(prefix + 2 * PAGE_SIZE, 45 gmap_discard(vcpu->arch.gmap,
46 end, vcpu->arch.gmap); 46 prefix + 2 * PAGE_SIZE, end);
47 } 47 }
48 return 0; 48 return 0;
49} 49}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f4c819bfc193..60a5cf40d49a 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -26,8 +26,9 @@
26#define IOINT_SSID_MASK 0x00030000 26#define IOINT_SSID_MASK 0x00030000
27#define IOINT_CSSID_MASK 0x03fc0000 27#define IOINT_CSSID_MASK 0x03fc0000
28#define IOINT_AI_MASK 0x04000000 28#define IOINT_AI_MASK 0x04000000
29#define PFAULT_INIT 0x0600
29 30
30static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu); 31static int deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
31 32
32static int is_ioint(u64 type) 33static int is_ioint(u64 type)
33{ 34{
@@ -205,11 +206,30 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
205 } 206 }
206} 207}
207 208
209static u16 get_ilc(struct kvm_vcpu *vcpu)
210{
211 const unsigned short table[] = { 2, 4, 4, 6 };
212
213 switch (vcpu->arch.sie_block->icptcode) {
214 case ICPT_INST:
215 case ICPT_INSTPROGI:
216 case ICPT_OPEREXC:
217 case ICPT_PARTEXEC:
218 case ICPT_IOINST:
219 /* last instruction only stored for these icptcodes */
220 return table[vcpu->arch.sie_block->ipa >> 14];
221 case ICPT_PROGI:
222 return vcpu->arch.sie_block->pgmilc;
223 default:
224 return 0;
225 }
226}
227
208static int __deliver_prog_irq(struct kvm_vcpu *vcpu, 228static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
209 struct kvm_s390_pgm_info *pgm_info) 229 struct kvm_s390_pgm_info *pgm_info)
210{ 230{
211 const unsigned short table[] = { 2, 4, 4, 6 };
212 int rc = 0; 231 int rc = 0;
232 u16 ilc = get_ilc(vcpu);
213 233
214 switch (pgm_info->code & ~PGM_PER) { 234 switch (pgm_info->code & ~PGM_PER) {
215 case PGM_AFX_TRANSLATION: 235 case PGM_AFX_TRANSLATION:
@@ -276,25 +296,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
276 (u8 *) __LC_PER_ACCESS_ID); 296 (u8 *) __LC_PER_ACCESS_ID);
277 } 297 }
278 298
279 switch (vcpu->arch.sie_block->icptcode) { 299 rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
280 case ICPT_INST:
281 case ICPT_INSTPROGI:
282 case ICPT_OPEREXC:
283 case ICPT_PARTEXEC:
284 case ICPT_IOINST:
285 /* last instruction only stored for these icptcodes */
286 rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
287 (u16 *) __LC_PGM_ILC);
288 break;
289 case ICPT_PROGI:
290 rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc,
291 (u16 *) __LC_PGM_ILC);
292 break;
293 default:
294 rc |= put_guest_lc(vcpu, 0,
295 (u16 *) __LC_PGM_ILC);
296 }
297
298 rc |= put_guest_lc(vcpu, pgm_info->code, 300 rc |= put_guest_lc(vcpu, pgm_info->code,
299 (u16 *)__LC_PGM_INT_CODE); 301 (u16 *)__LC_PGM_INT_CODE);
300 rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, 302 rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
@@ -305,7 +307,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
305 return rc; 307 return rc;
306} 308}
307 309
308static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, 310static int __do_deliver_interrupt(struct kvm_vcpu *vcpu,
309 struct kvm_s390_interrupt_info *inti) 311 struct kvm_s390_interrupt_info *inti)
310{ 312{
311 const unsigned short table[] = { 2, 4, 4, 6 }; 313 const unsigned short table[] = { 2, 4, 4, 6 };
@@ -343,7 +345,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
343 case KVM_S390_INT_CLOCK_COMP: 345 case KVM_S390_INT_CLOCK_COMP:
344 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 346 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
345 inti->ext.ext_params, 0); 347 inti->ext.ext_params, 0);
346 deliver_ckc_interrupt(vcpu); 348 rc = deliver_ckc_interrupt(vcpu);
347 break; 349 break;
348 case KVM_S390_INT_CPU_TIMER: 350 case KVM_S390_INT_CPU_TIMER:
349 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 351 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
@@ -376,8 +378,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
376 case KVM_S390_INT_PFAULT_INIT: 378 case KVM_S390_INT_PFAULT_INIT:
377 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, 379 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
378 inti->ext.ext_params2); 380 inti->ext.ext_params2);
379 rc = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE); 381 rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
380 rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR); 382 (u16 *) __LC_EXT_INT_CODE);
383 rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
381 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, 384 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
382 &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); 385 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
383 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, 386 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
@@ -501,14 +504,11 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
501 default: 504 default:
502 BUG(); 505 BUG();
503 } 506 }
504 if (rc) { 507
505 printk("kvm: The guest lowcore is not mapped during interrupt " 508 return rc;
506 "delivery, killing userspace\n");
507 do_exit(SIGKILL);
508 }
509} 509}
510 510
511static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu) 511static int deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
512{ 512{
513 int rc; 513 int rc;
514 514
@@ -518,11 +518,7 @@ static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
518 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, 518 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
519 &vcpu->arch.sie_block->gpsw, 519 &vcpu->arch.sie_block->gpsw,
520 sizeof(psw_t)); 520 sizeof(psw_t));
521 if (rc) { 521 return rc;
522 printk("kvm: The guest lowcore is not mapped during interrupt "
523 "delivery, killing userspace\n");
524 do_exit(SIGKILL);
525 }
526} 522}
527 523
528/* Check whether SIGP interpretation facility has an external call pending */ 524/* Check whether SIGP interpretation facility has an external call pending */
@@ -661,12 +657,13 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
661 &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl); 657 &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
662} 658}
663 659
664void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) 660int kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
665{ 661{
666 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 662 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
667 struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; 663 struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
668 struct kvm_s390_interrupt_info *n, *inti = NULL; 664 struct kvm_s390_interrupt_info *n, *inti = NULL;
669 int deliver; 665 int deliver;
666 int rc = 0;
670 667
671 __reset_intercept_indicators(vcpu); 668 __reset_intercept_indicators(vcpu);
672 if (atomic_read(&li->active)) { 669 if (atomic_read(&li->active)) {
@@ -685,16 +682,16 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
685 atomic_set(&li->active, 0); 682 atomic_set(&li->active, 0);
686 spin_unlock(&li->lock); 683 spin_unlock(&li->lock);
687 if (deliver) { 684 if (deliver) {
688 __do_deliver_interrupt(vcpu, inti); 685 rc = __do_deliver_interrupt(vcpu, inti);
689 kfree(inti); 686 kfree(inti);
690 } 687 }
691 } while (deliver); 688 } while (!rc && deliver);
692 } 689 }
693 690
694 if (kvm_cpu_has_pending_timer(vcpu)) 691 if (!rc && kvm_cpu_has_pending_timer(vcpu))
695 deliver_ckc_interrupt(vcpu); 692 rc = deliver_ckc_interrupt(vcpu);
696 693
697 if (atomic_read(&fi->active)) { 694 if (!rc && atomic_read(&fi->active)) {
698 do { 695 do {
699 deliver = 0; 696 deliver = 0;
700 spin_lock(&fi->lock); 697 spin_lock(&fi->lock);
@@ -711,67 +708,13 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
711 atomic_set(&fi->active, 0); 708 atomic_set(&fi->active, 0);
712 spin_unlock(&fi->lock); 709 spin_unlock(&fi->lock);
713 if (deliver) { 710 if (deliver) {
714 __do_deliver_interrupt(vcpu, inti); 711 rc = __do_deliver_interrupt(vcpu, inti);
715 kfree(inti); 712 kfree(inti);
716 } 713 }
717 } while (deliver); 714 } while (!rc && deliver);
718 } 715 }
719}
720 716
721void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) 717 return rc;
722{
723 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
724 struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
725 struct kvm_s390_interrupt_info *n, *inti = NULL;
726 int deliver;
727
728 __reset_intercept_indicators(vcpu);
729 if (atomic_read(&li->active)) {
730 do {
731 deliver = 0;
732 spin_lock(&li->lock);
733 list_for_each_entry_safe(inti, n, &li->list, list) {
734 if ((inti->type == KVM_S390_MCHK) &&
735 __interrupt_is_deliverable(vcpu, inti)) {
736 list_del(&inti->list);
737 deliver = 1;
738 break;
739 }
740 __set_intercept_indicator(vcpu, inti);
741 }
742 if (list_empty(&li->list))
743 atomic_set(&li->active, 0);
744 spin_unlock(&li->lock);
745 if (deliver) {
746 __do_deliver_interrupt(vcpu, inti);
747 kfree(inti);
748 }
749 } while (deliver);
750 }
751
752 if (atomic_read(&fi->active)) {
753 do {
754 deliver = 0;
755 spin_lock(&fi->lock);
756 list_for_each_entry_safe(inti, n, &fi->list, list) {
757 if ((inti->type == KVM_S390_MCHK) &&
758 __interrupt_is_deliverable(vcpu, inti)) {
759 list_del(&inti->list);
760 fi->irq_count--;
761 deliver = 1;
762 break;
763 }
764 __set_intercept_indicator(vcpu, inti);
765 }
766 if (list_empty(&fi->list))
767 atomic_set(&fi->active, 0);
768 spin_unlock(&fi->lock);
769 if (deliver) {
770 __do_deliver_interrupt(vcpu, inti);
771 kfree(inti);
772 }
773 } while (deliver);
774 }
775} 718}
776 719
777int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) 720int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
@@ -1048,7 +991,6 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
1048 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm, 991 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
1049 s390int->parm64, 2); 992 s390int->parm64, 2);
1050 993
1051 mutex_lock(&vcpu->kvm->lock);
1052 li = &vcpu->arch.local_int; 994 li = &vcpu->arch.local_int;
1053 spin_lock(&li->lock); 995 spin_lock(&li->lock);
1054 if (inti->type == KVM_S390_PROGRAM_INT) 996 if (inti->type == KVM_S390_PROGRAM_INT)
@@ -1060,7 +1002,6 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
1060 li->action_bits |= ACTION_STOP_ON_STOP; 1002 li->action_bits |= ACTION_STOP_ON_STOP;
1061 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); 1003 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
1062 spin_unlock(&li->lock); 1004 spin_unlock(&li->lock);
1063 mutex_unlock(&vcpu->kvm->lock);
1064 kvm_s390_vcpu_wakeup(vcpu); 1005 kvm_s390_vcpu_wakeup(vcpu);
1065 return 0; 1006 return 0;
1066} 1007}
@@ -1300,7 +1241,7 @@ static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
1300 } 1241 }
1301 INIT_LIST_HEAD(&map->list); 1242 INIT_LIST_HEAD(&map->list);
1302 map->guest_addr = addr; 1243 map->guest_addr = addr;
1303 map->addr = gmap_translate(addr, kvm->arch.gmap); 1244 map->addr = gmap_translate(kvm->arch.gmap, addr);
1304 if (map->addr == -EFAULT) { 1245 if (map->addr == -EFAULT) {
1305 ret = -EFAULT; 1246 ret = -EFAULT;
1306 goto out; 1247 goto out;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 197bec03d919..36209969bf98 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -451,7 +451,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
451 if (type & KVM_VM_S390_UCONTROL) { 451 if (type & KVM_VM_S390_UCONTROL) {
452 kvm->arch.gmap = NULL; 452 kvm->arch.gmap = NULL;
453 } else { 453 } else {
454 kvm->arch.gmap = gmap_alloc(current->mm); 454 kvm->arch.gmap = gmap_alloc(current->mm, -1UL);
455 if (!kvm->arch.gmap) 455 if (!kvm->arch.gmap)
456 goto out_nogmap; 456 goto out_nogmap;
457 kvm->arch.gmap->private = kvm; 457 kvm->arch.gmap->private = kvm;
@@ -535,7 +535,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
535 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 535 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
536 kvm_clear_async_pf_completion_queue(vcpu); 536 kvm_clear_async_pf_completion_queue(vcpu);
537 if (kvm_is_ucontrol(vcpu->kvm)) { 537 if (kvm_is_ucontrol(vcpu->kvm)) {
538 vcpu->arch.gmap = gmap_alloc(current->mm); 538 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
539 if (!vcpu->arch.gmap) 539 if (!vcpu->arch.gmap)
540 return -ENOMEM; 540 return -ENOMEM;
541 vcpu->arch.gmap->private = vcpu->kvm; 541 vcpu->arch.gmap->private = vcpu->kvm;
@@ -546,7 +546,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
546 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 546 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
547 KVM_SYNC_GPRS | 547 KVM_SYNC_GPRS |
548 KVM_SYNC_ACRS | 548 KVM_SYNC_ACRS |
549 KVM_SYNC_CRS; 549 KVM_SYNC_CRS |
550 KVM_SYNC_ARCH0 |
551 KVM_SYNC_PFAULT;
550 return 0; 552 return 0;
551} 553}
552 554
@@ -1053,6 +1055,11 @@ retry:
1053 goto retry; 1055 goto retry;
1054 } 1056 }
1055 1057
1058 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1059 vcpu->arch.sie_block->ihcpu = 0xffff;
1060 goto retry;
1061 }
1062
1056 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 1063 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1057 if (!ibs_enabled(vcpu)) { 1064 if (!ibs_enabled(vcpu)) {
1058 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 1065 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
@@ -1089,18 +1096,8 @@ retry:
1089 */ 1096 */
1090long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 1097long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1091{ 1098{
1092 struct mm_struct *mm = current->mm; 1099 return gmap_fault(vcpu->arch.gmap, gpa,
1093 hva_t hva; 1100 writable ? FAULT_FLAG_WRITE : 0);
1094 long rc;
1095
1096 hva = gmap_fault(gpa, vcpu->arch.gmap);
1097 if (IS_ERR_VALUE(hva))
1098 return (long)hva;
1099 down_read(&mm->mmap_sem);
1100 rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL);
1101 up_read(&mm->mmap_sem);
1102
1103 return rc < 0 ? rc : 0;
1104} 1101}
1105 1102
1106static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 1103static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
@@ -1195,8 +1192,11 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
1195 if (test_cpu_flag(CIF_MCCK_PENDING)) 1192 if (test_cpu_flag(CIF_MCCK_PENDING))
1196 s390_handle_mcck(); 1193 s390_handle_mcck();
1197 1194
1198 if (!kvm_is_ucontrol(vcpu->kvm)) 1195 if (!kvm_is_ucontrol(vcpu->kvm)) {
1199 kvm_s390_deliver_pending_interrupts(vcpu); 1196 rc = kvm_s390_deliver_pending_interrupts(vcpu);
1197 if (rc)
1198 return rc;
1199 }
1200 1200
1201 rc = kvm_s390_handle_requests(vcpu); 1201 rc = kvm_s390_handle_requests(vcpu);
1202 if (rc) 1202 if (rc)
@@ -1300,6 +1300,48 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
1300 return rc; 1300 return rc;
1301} 1301}
1302 1302
1303static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1304{
1305 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
1306 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
1307 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
1308 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1309 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
1310 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
1311 /* some control register changes require a tlb flush */
1312 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1313 }
1314 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
1315 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
1316 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
1317 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
1318 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
1319 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
1320 }
1321 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
1322 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
1323 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
1324 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
1325 }
1326 kvm_run->kvm_dirty_regs = 0;
1327}
1328
1329static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1330{
1331 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
1332 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
1333 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
1334 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
1335 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
1336 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
1337 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
1338 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
1339 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
1340 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
1341 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
1342 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
1343}
1344
1303int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1345int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1304{ 1346{
1305 int rc; 1347 int rc;
@@ -1321,17 +1363,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1321 return -EINVAL; 1363 return -EINVAL;
1322 } 1364 }
1323 1365
1324 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 1366 sync_regs(vcpu, kvm_run);
1325 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
1326 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
1327 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
1328 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1329 }
1330 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
1331 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
1332 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
1333 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1334 }
1335 1367
1336 might_fault(); 1368 might_fault();
1337 rc = __vcpu_run(vcpu); 1369 rc = __vcpu_run(vcpu);
@@ -1361,10 +1393,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1361 rc = 0; 1393 rc = 0;
1362 } 1394 }
1363 1395
1364 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 1396 store_regs(vcpu, kvm_run);
1365 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
1366 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
1367 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
1368 1397
1369 if (vcpu->sigset_active) 1398 if (vcpu->sigset_active)
1370 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1399 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -1493,7 +1522,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
1493 * Another VCPU might have used IBS while we were offline. 1522 * Another VCPU might have used IBS while we were offline.
1494 * Let's play safe and flush the VCPU at startup. 1523 * Let's play safe and flush the VCPU at startup.
1495 */ 1524 */
1496 vcpu->arch.sie_block->ihcpu = 0xffff; 1525 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1497 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 1526 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
1498 return; 1527 return;
1499} 1528}
@@ -1648,9 +1677,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1648 } 1677 }
1649#endif 1678#endif
1650 case KVM_S390_VCPU_FAULT: { 1679 case KVM_S390_VCPU_FAULT: {
1651 r = gmap_fault(arg, vcpu->arch.gmap); 1680 r = gmap_fault(vcpu->arch.gmap, arg, 0);
1652 if (!IS_ERR_VALUE(r))
1653 r = 0;
1654 break; 1681 break;
1655 } 1682 }
1656 case KVM_ENABLE_CAP: 1683 case KVM_ENABLE_CAP:
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 3862fa2cefe0..99abcb56e478 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -70,7 +70,7 @@ static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
70static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) 70static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
71{ 71{
72 vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT; 72 vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
73 vcpu->arch.sie_block->ihcpu = 0xffff; 73 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
74 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 74 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
75} 75}
76 76
@@ -138,8 +138,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
138int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); 138int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
139void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); 139void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
140enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); 140enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
141void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); 141int kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
142void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
143void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu); 142void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
144void kvm_s390_clear_float_irqs(struct kvm *kvm); 143void kvm_s390_clear_float_irqs(struct kvm *kvm);
145int __must_check kvm_s390_inject_vm(struct kvm *kvm, 144int __must_check kvm_s390_inject_vm(struct kvm *kvm,
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index f89c1cd67751..72bb2dd8b9cd 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -352,13 +352,6 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
352 return 0; 352 return 0;
353} 353}
354 354
355static void handle_new_psw(struct kvm_vcpu *vcpu)
356{
357 /* Check whether the new psw is enabled for machine checks. */
358 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK)
359 kvm_s390_deliver_pending_machine_checks(vcpu);
360}
361
362#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) 355#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
363#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL 356#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
364#define PSW_ADDR_24 0x0000000000ffffffUL 357#define PSW_ADDR_24 0x0000000000ffffffUL
@@ -405,7 +398,6 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
405 gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE; 398 gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE;
406 if (!is_valid_psw(gpsw)) 399 if (!is_valid_psw(gpsw))
407 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 400 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
408 handle_new_psw(vcpu);
409 return 0; 401 return 0;
410} 402}
411 403
@@ -427,7 +419,6 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
427 vcpu->arch.sie_block->gpsw = new_psw; 419 vcpu->arch.sie_block->gpsw = new_psw;
428 if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) 420 if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
429 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 421 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
430 handle_new_psw(vcpu);
431 return 0; 422 return 0;
432} 423}
433 424
@@ -738,7 +729,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
738 /* invalid entry */ 729 /* invalid entry */
739 break; 730 break;
740 /* try to free backing */ 731 /* try to free backing */
741 __gmap_zap(cbrle, gmap); 732 __gmap_zap(gmap, cbrle);
742 } 733 }
743 up_read(&gmap->mm->mmap_sem); 734 up_read(&gmap->mm->mmap_sem);
744 if (i < entries) 735 if (i < entries)
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 3f3b35403d0a..a2b81d6ce8a5 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -442,18 +442,15 @@ static inline int do_exception(struct pt_regs *regs, int access)
442 down_read(&mm->mmap_sem); 442 down_read(&mm->mmap_sem);
443 443
444#ifdef CONFIG_PGSTE 444#ifdef CONFIG_PGSTE
445 gmap = (struct gmap *) 445 gmap = (current->flags & PF_VCPU) ?
446 ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0); 446 (struct gmap *) S390_lowcore.gmap : NULL;
447 if (gmap) { 447 if (gmap) {
448 address = __gmap_fault(address, gmap); 448 current->thread.gmap_addr = address;
449 address = __gmap_translate(gmap, address);
449 if (address == -EFAULT) { 450 if (address == -EFAULT) {
450 fault = VM_FAULT_BADMAP; 451 fault = VM_FAULT_BADMAP;
451 goto out_up; 452 goto out_up;
452 } 453 }
453 if (address == -ENOMEM) {
454 fault = VM_FAULT_OOM;
455 goto out_up;
456 }
457 if (gmap->pfault_enabled) 454 if (gmap->pfault_enabled)
458 flags |= FAULT_FLAG_RETRY_NOWAIT; 455 flags |= FAULT_FLAG_RETRY_NOWAIT;
459 } 456 }
@@ -530,6 +527,20 @@ retry:
530 goto retry; 527 goto retry;
531 } 528 }
532 } 529 }
530#ifdef CONFIG_PGSTE
531 if (gmap) {
532 address = __gmap_link(gmap, current->thread.gmap_addr,
533 address);
534 if (address == -EFAULT) {
535 fault = VM_FAULT_BADMAP;
536 goto out_up;
537 }
538 if (address == -ENOMEM) {
539 fault = VM_FAULT_OOM;
540 goto out_up;
541 }
542 }
543#endif
533 fault = 0; 544 fault = 0;
534out_up: 545out_up:
535 up_read(&mm->mmap_sem); 546 up_read(&mm->mmap_sem);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5404a6261db9..665714b08c0d 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -145,30 +145,56 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
145/** 145/**
146 * gmap_alloc - allocate a guest address space 146 * gmap_alloc - allocate a guest address space
147 * @mm: pointer to the parent mm_struct 147 * @mm: pointer to the parent mm_struct
148 * @limit: maximum size of the gmap address space
148 * 149 *
149 * Returns a guest address space structure. 150 * Returns a guest address space structure.
150 */ 151 */
151struct gmap *gmap_alloc(struct mm_struct *mm) 152struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
152{ 153{
153 struct gmap *gmap; 154 struct gmap *gmap;
154 struct page *page; 155 struct page *page;
155 unsigned long *table; 156 unsigned long *table;
156 157 unsigned long etype, atype;
158
159 if (limit < (1UL << 31)) {
160 limit = (1UL << 31) - 1;
161 atype = _ASCE_TYPE_SEGMENT;
162 etype = _SEGMENT_ENTRY_EMPTY;
163 } else if (limit < (1UL << 42)) {
164 limit = (1UL << 42) - 1;
165 atype = _ASCE_TYPE_REGION3;
166 etype = _REGION3_ENTRY_EMPTY;
167 } else if (limit < (1UL << 53)) {
168 limit = (1UL << 53) - 1;
169 atype = _ASCE_TYPE_REGION2;
170 etype = _REGION2_ENTRY_EMPTY;
171 } else {
172 limit = -1UL;
173 atype = _ASCE_TYPE_REGION1;
174 etype = _REGION1_ENTRY_EMPTY;
175 }
157 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); 176 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
158 if (!gmap) 177 if (!gmap)
159 goto out; 178 goto out;
160 INIT_LIST_HEAD(&gmap->crst_list); 179 INIT_LIST_HEAD(&gmap->crst_list);
180 INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
181 INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
182 spin_lock_init(&gmap->guest_table_lock);
161 gmap->mm = mm; 183 gmap->mm = mm;
162 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 184 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
163 if (!page) 185 if (!page)
164 goto out_free; 186 goto out_free;
187 page->index = 0;
165 list_add(&page->lru, &gmap->crst_list); 188 list_add(&page->lru, &gmap->crst_list);
166 table = (unsigned long *) page_to_phys(page); 189 table = (unsigned long *) page_to_phys(page);
167 crst_table_init(table, _REGION1_ENTRY_EMPTY); 190 crst_table_init(table, etype);
168 gmap->table = table; 191 gmap->table = table;
169 gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | 192 gmap->asce = atype | _ASCE_TABLE_LENGTH |
170 _ASCE_USER_BITS | __pa(table); 193 _ASCE_USER_BITS | __pa(table);
194 gmap->asce_end = limit;
195 down_write(&mm->mmap_sem);
171 list_add(&gmap->list, &mm->context.gmap_list); 196 list_add(&gmap->list, &mm->context.gmap_list);
197 up_write(&mm->mmap_sem);
172 return gmap; 198 return gmap;
173 199
174out_free: 200out_free:
@@ -178,36 +204,38 @@ out:
178} 204}
179EXPORT_SYMBOL_GPL(gmap_alloc); 205EXPORT_SYMBOL_GPL(gmap_alloc);
180 206
181static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
182{
183 struct gmap_pgtable *mp;
184 struct gmap_rmap *rmap;
185 struct page *page;
186
187 if (*table & _SEGMENT_ENTRY_INVALID)
188 return 0;
189 page = pfn_to_page(*table >> PAGE_SHIFT);
190 mp = (struct gmap_pgtable *) page->index;
191 list_for_each_entry(rmap, &mp->mapper, list) {
192 if (rmap->entry != table)
193 continue;
194 list_del(&rmap->list);
195 kfree(rmap);
196 break;
197 }
198 *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT;
199 return 1;
200}
201
202static void gmap_flush_tlb(struct gmap *gmap) 207static void gmap_flush_tlb(struct gmap *gmap)
203{ 208{
204 if (MACHINE_HAS_IDTE) 209 if (MACHINE_HAS_IDTE)
205 __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table | 210 __tlb_flush_asce(gmap->mm, gmap->asce);
206 _ASCE_TYPE_REGION1);
207 else 211 else
208 __tlb_flush_global(); 212 __tlb_flush_global();
209} 213}
210 214
215static void gmap_radix_tree_free(struct radix_tree_root *root)
216{
217 struct radix_tree_iter iter;
218 unsigned long indices[16];
219 unsigned long index;
220 void **slot;
221 int i, nr;
222
223 /* A radix tree is freed by deleting all of its entries */
224 index = 0;
225 do {
226 nr = 0;
227 radix_tree_for_each_slot(slot, root, &iter, index) {
228 indices[nr] = iter.index;
229 if (++nr == 16)
230 break;
231 }
232 for (i = 0; i < nr; i++) {
233 index = indices[i];
234 radix_tree_delete(root, index);
235 }
236 } while (nr > 0);
237}
238
211/** 239/**
212 * gmap_free - free a guest address space 240 * gmap_free - free a guest address space
213 * @gmap: pointer to the guest address space structure 241 * @gmap: pointer to the guest address space structure
@@ -215,31 +243,21 @@ static void gmap_flush_tlb(struct gmap *gmap)
215void gmap_free(struct gmap *gmap) 243void gmap_free(struct gmap *gmap)
216{ 244{
217 struct page *page, *next; 245 struct page *page, *next;
218 unsigned long *table;
219 int i;
220
221 246
222 /* Flush tlb. */ 247 /* Flush tlb. */
223 if (MACHINE_HAS_IDTE) 248 if (MACHINE_HAS_IDTE)
224 __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table | 249 __tlb_flush_asce(gmap->mm, gmap->asce);
225 _ASCE_TYPE_REGION1);
226 else 250 else
227 __tlb_flush_global(); 251 __tlb_flush_global();
228 252
229 /* Free all segment & region tables. */ 253 /* Free all segment & region tables. */
230 down_read(&gmap->mm->mmap_sem); 254 list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
231 spin_lock(&gmap->mm->page_table_lock);
232 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) {
233 table = (unsigned long *) page_to_phys(page);
234 if ((*table & _REGION_ENTRY_TYPE_MASK) == 0)
235 /* Remove gmap rmap structures for segment table. */
236 for (i = 0; i < PTRS_PER_PMD; i++, table++)
237 gmap_unlink_segment(gmap, table);
238 __free_pages(page, ALLOC_ORDER); 255 __free_pages(page, ALLOC_ORDER);
239 } 256 gmap_radix_tree_free(&gmap->guest_to_host);
240 spin_unlock(&gmap->mm->page_table_lock); 257 gmap_radix_tree_free(&gmap->host_to_guest);
241 up_read(&gmap->mm->mmap_sem); 258 down_write(&gmap->mm->mmap_sem);
242 list_del(&gmap->list); 259 list_del(&gmap->list);
260 up_write(&gmap->mm->mmap_sem);
243 kfree(gmap); 261 kfree(gmap);
244} 262}
245EXPORT_SYMBOL_GPL(gmap_free); 263EXPORT_SYMBOL_GPL(gmap_free);
@@ -267,42 +285,97 @@ EXPORT_SYMBOL_GPL(gmap_disable);
267/* 285/*
268 * gmap_alloc_table is assumed to be called with mmap_sem held 286 * gmap_alloc_table is assumed to be called with mmap_sem held
269 */ 287 */
270static int gmap_alloc_table(struct gmap *gmap, 288static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
271 unsigned long *table, unsigned long init) 289 unsigned long init, unsigned long gaddr)
272 __releases(&gmap->mm->page_table_lock)
273 __acquires(&gmap->mm->page_table_lock)
274{ 290{
275 struct page *page; 291 struct page *page;
276 unsigned long *new; 292 unsigned long *new;
277 293
278 /* since we dont free the gmap table until gmap_free we can unlock */ 294 /* since we dont free the gmap table until gmap_free we can unlock */
279 spin_unlock(&gmap->mm->page_table_lock);
280 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 295 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
281 spin_lock(&gmap->mm->page_table_lock);
282 if (!page) 296 if (!page)
283 return -ENOMEM; 297 return -ENOMEM;
284 new = (unsigned long *) page_to_phys(page); 298 new = (unsigned long *) page_to_phys(page);
285 crst_table_init(new, init); 299 crst_table_init(new, init);
300 spin_lock(&gmap->mm->page_table_lock);
286 if (*table & _REGION_ENTRY_INVALID) { 301 if (*table & _REGION_ENTRY_INVALID) {
287 list_add(&page->lru, &gmap->crst_list); 302 list_add(&page->lru, &gmap->crst_list);
288 *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 303 *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
289 (*table & _REGION_ENTRY_TYPE_MASK); 304 (*table & _REGION_ENTRY_TYPE_MASK);
290 } else 305 page->index = gaddr;
306 page = NULL;
307 }
308 spin_unlock(&gmap->mm->page_table_lock);
309 if (page)
291 __free_pages(page, ALLOC_ORDER); 310 __free_pages(page, ALLOC_ORDER);
292 return 0; 311 return 0;
293} 312}
294 313
295/** 314/**
315 * __gmap_segment_gaddr - find virtual address from segment pointer
316 * @entry: pointer to a segment table entry in the guest address space
317 *
318 * Returns the virtual address in the guest address space for the segment
319 */
320static unsigned long __gmap_segment_gaddr(unsigned long *entry)
321{
322 struct page *page;
323 unsigned long offset;
324
325 offset = (unsigned long) entry / sizeof(unsigned long);
326 offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
327 page = pmd_to_page((pmd_t *) entry);
328 return page->index + offset;
329}
330
331/**
332 * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
333 * @gmap: pointer to the guest address space structure
334 * @vmaddr: address in the host process address space
335 *
336 * Returns 1 if a TLB flush is required
337 */
338static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
339{
340 unsigned long *entry;
341 int flush = 0;
342
343 spin_lock(&gmap->guest_table_lock);
344 entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
345 if (entry) {
346 flush = (*entry != _SEGMENT_ENTRY_INVALID);
347 *entry = _SEGMENT_ENTRY_INVALID;
348 }
349 spin_unlock(&gmap->guest_table_lock);
350 return flush;
351}
352
353/**
354 * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
355 * @gmap: pointer to the guest address space structure
356 * @gaddr: address in the guest address space
357 *
358 * Returns 1 if a TLB flush is required
359 */
360static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
361{
362 unsigned long vmaddr;
363
364 vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
365 gaddr >> PMD_SHIFT);
366 return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
367}
368
369/**
296 * gmap_unmap_segment - unmap segment from the guest address space 370 * gmap_unmap_segment - unmap segment from the guest address space
297 * @gmap: pointer to the guest address space structure 371 * @gmap: pointer to the guest address space structure
298 * @addr: address in the guest address space 372 * @to: address in the guest address space
299 * @len: length of the memory area to unmap 373 * @len: length of the memory area to unmap
300 * 374 *
301 * Returns 0 if the unmap succeeded, -EINVAL if not. 375 * Returns 0 if the unmap succeeded, -EINVAL if not.
302 */ 376 */
303int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 377int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
304{ 378{
305 unsigned long *table;
306 unsigned long off; 379 unsigned long off;
307 int flush; 380 int flush;
308 381
@@ -312,31 +385,10 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
312 return -EINVAL; 385 return -EINVAL;
313 386
314 flush = 0; 387 flush = 0;
315 down_read(&gmap->mm->mmap_sem); 388 down_write(&gmap->mm->mmap_sem);
316 spin_lock(&gmap->mm->page_table_lock); 389 for (off = 0; off < len; off += PMD_SIZE)
317 for (off = 0; off < len; off += PMD_SIZE) { 390 flush |= __gmap_unmap_by_gaddr(gmap, to + off);
318 /* Walk the guest addr space page table */ 391 up_write(&gmap->mm->mmap_sem);
319 table = gmap->table + (((to + off) >> 53) & 0x7ff);
320 if (*table & _REGION_ENTRY_INVALID)
321 goto out;
322 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
323 table = table + (((to + off) >> 42) & 0x7ff);
324 if (*table & _REGION_ENTRY_INVALID)
325 goto out;
326 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
327 table = table + (((to + off) >> 31) & 0x7ff);
328 if (*table & _REGION_ENTRY_INVALID)
329 goto out;
330 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
331 table = table + (((to + off) >> 20) & 0x7ff);
332
333 /* Clear segment table entry in guest address space. */
334 flush |= gmap_unlink_segment(gmap, table);
335 *table = _SEGMENT_ENTRY_INVALID;
336 }
337out:
338 spin_unlock(&gmap->mm->page_table_lock);
339 up_read(&gmap->mm->mmap_sem);
340 if (flush) 392 if (flush)
341 gmap_flush_tlb(gmap); 393 gmap_flush_tlb(gmap);
342 return 0; 394 return 0;
@@ -348,87 +400,47 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment);
348 * @gmap: pointer to the guest address space structure 400 * @gmap: pointer to the guest address space structure
349 * @from: source address in the parent address space 401 * @from: source address in the parent address space
350 * @to: target address in the guest address space 402 * @to: target address in the guest address space
403 * @len: length of the memory area to map
351 * 404 *
352 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. 405 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
353 */ 406 */
354int gmap_map_segment(struct gmap *gmap, unsigned long from, 407int gmap_map_segment(struct gmap *gmap, unsigned long from,
355 unsigned long to, unsigned long len) 408 unsigned long to, unsigned long len)
356{ 409{
357 unsigned long *table;
358 unsigned long off; 410 unsigned long off;
359 int flush; 411 int flush;
360 412
361 if ((from | to | len) & (PMD_SIZE - 1)) 413 if ((from | to | len) & (PMD_SIZE - 1))
362 return -EINVAL; 414 return -EINVAL;
363 if (len == 0 || from + len > TASK_MAX_SIZE || 415 if (len == 0 || from + len < from || to + len < to ||
364 from + len < from || to + len < to) 416 from + len > TASK_MAX_SIZE || to + len > gmap->asce_end)
365 return -EINVAL; 417 return -EINVAL;
366 418
367 flush = 0; 419 flush = 0;
368 down_read(&gmap->mm->mmap_sem); 420 down_write(&gmap->mm->mmap_sem);
369 spin_lock(&gmap->mm->page_table_lock);
370 for (off = 0; off < len; off += PMD_SIZE) { 421 for (off = 0; off < len; off += PMD_SIZE) {
371 /* Walk the gmap address space page table */ 422 /* Remove old translation */
372 table = gmap->table + (((to + off) >> 53) & 0x7ff); 423 flush |= __gmap_unmap_by_gaddr(gmap, to + off);
373 if ((*table & _REGION_ENTRY_INVALID) && 424 /* Store new translation */
374 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) 425 if (radix_tree_insert(&gmap->guest_to_host,
375 goto out_unmap; 426 (to + off) >> PMD_SHIFT,
376 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 427 (void *) from + off))
377 table = table + (((to + off) >> 42) & 0x7ff); 428 break;
378 if ((*table & _REGION_ENTRY_INVALID) &&
379 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
380 goto out_unmap;
381 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
382 table = table + (((to + off) >> 31) & 0x7ff);
383 if ((*table & _REGION_ENTRY_INVALID) &&
384 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
385 goto out_unmap;
386 table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
387 table = table + (((to + off) >> 20) & 0x7ff);
388
389 /* Store 'from' address in an invalid segment table entry. */
390 flush |= gmap_unlink_segment(gmap, table);
391 *table = (from + off) | (_SEGMENT_ENTRY_INVALID |
392 _SEGMENT_ENTRY_PROTECT);
393 } 429 }
394 spin_unlock(&gmap->mm->page_table_lock); 430 up_write(&gmap->mm->mmap_sem);
395 up_read(&gmap->mm->mmap_sem);
396 if (flush) 431 if (flush)
397 gmap_flush_tlb(gmap); 432 gmap_flush_tlb(gmap);
398 return 0; 433 if (off >= len)
399 434 return 0;
400out_unmap:
401 spin_unlock(&gmap->mm->page_table_lock);
402 up_read(&gmap->mm->mmap_sem);
403 gmap_unmap_segment(gmap, to, len); 435 gmap_unmap_segment(gmap, to, len);
404 return -ENOMEM; 436 return -ENOMEM;
405} 437}
406EXPORT_SYMBOL_GPL(gmap_map_segment); 438EXPORT_SYMBOL_GPL(gmap_map_segment);
407 439
408static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
409{
410 unsigned long *table;
411
412 table = gmap->table + ((address >> 53) & 0x7ff);
413 if (unlikely(*table & _REGION_ENTRY_INVALID))
414 return ERR_PTR(-EFAULT);
415 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
416 table = table + ((address >> 42) & 0x7ff);
417 if (unlikely(*table & _REGION_ENTRY_INVALID))
418 return ERR_PTR(-EFAULT);
419 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
420 table = table + ((address >> 31) & 0x7ff);
421 if (unlikely(*table & _REGION_ENTRY_INVALID))
422 return ERR_PTR(-EFAULT);
423 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
424 table = table + ((address >> 20) & 0x7ff);
425 return table;
426}
427
428/** 440/**
429 * __gmap_translate - translate a guest address to a user space address 441 * __gmap_translate - translate a guest address to a user space address
430 * @address: guest address
431 * @gmap: pointer to guest mapping meta data structure 442 * @gmap: pointer to guest mapping meta data structure
443 * @gaddr: guest address
432 * 444 *
433 * Returns user space address which corresponds to the guest address or 445 * Returns user space address which corresponds to the guest address or
434 * -EFAULT if no such mapping exists. 446 * -EFAULT if no such mapping exists.
@@ -436,168 +448,161 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
436 * The mmap_sem of the mm that belongs to the address space must be held 448 * The mmap_sem of the mm that belongs to the address space must be held
437 * when this function gets called. 449 * when this function gets called.
438 */ 450 */
439unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) 451unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
440{ 452{
441 unsigned long *segment_ptr, vmaddr, segment; 453 unsigned long vmaddr;
442 struct gmap_pgtable *mp;
443 struct page *page;
444 454
445 current->thread.gmap_addr = address; 455 vmaddr = (unsigned long)
446 segment_ptr = gmap_table_walk(address, gmap); 456 radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
447 if (IS_ERR(segment_ptr)) 457 return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
448 return PTR_ERR(segment_ptr);
449 /* Convert the gmap address to an mm address. */
450 segment = *segment_ptr;
451 if (!(segment & _SEGMENT_ENTRY_INVALID)) {
452 page = pfn_to_page(segment >> PAGE_SHIFT);
453 mp = (struct gmap_pgtable *) page->index;
454 return mp->vmaddr | (address & ~PMD_MASK);
455 } else if (segment & _SEGMENT_ENTRY_PROTECT) {
456 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
457 return vmaddr | (address & ~PMD_MASK);
458 }
459 return -EFAULT;
460} 458}
461EXPORT_SYMBOL_GPL(__gmap_translate); 459EXPORT_SYMBOL_GPL(__gmap_translate);
462 460
463/** 461/**
464 * gmap_translate - translate a guest address to a user space address 462 * gmap_translate - translate a guest address to a user space address
465 * @address: guest address
466 * @gmap: pointer to guest mapping meta data structure 463 * @gmap: pointer to guest mapping meta data structure
464 * @gaddr: guest address
467 * 465 *
468 * Returns user space address which corresponds to the guest address or 466 * Returns user space address which corresponds to the guest address or
469 * -EFAULT if no such mapping exists. 467 * -EFAULT if no such mapping exists.
470 * This function does not establish potentially missing page table entries. 468 * This function does not establish potentially missing page table entries.
471 */ 469 */
472unsigned long gmap_translate(unsigned long address, struct gmap *gmap) 470unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
473{ 471{
474 unsigned long rc; 472 unsigned long rc;
475 473
476 down_read(&gmap->mm->mmap_sem); 474 down_read(&gmap->mm->mmap_sem);
477 rc = __gmap_translate(address, gmap); 475 rc = __gmap_translate(gmap, gaddr);
478 up_read(&gmap->mm->mmap_sem); 476 up_read(&gmap->mm->mmap_sem);
479 return rc; 477 return rc;
480} 478}
481EXPORT_SYMBOL_GPL(gmap_translate); 479EXPORT_SYMBOL_GPL(gmap_translate);
482 480
483static int gmap_connect_pgtable(unsigned long address, unsigned long segment, 481/**
484 unsigned long *segment_ptr, struct gmap *gmap) 482 * gmap_unlink - disconnect a page table from the gmap shadow tables
483 * @gmap: pointer to guest mapping meta data structure
484 * @table: pointer to the host page table
485 * @vmaddr: vm address associated with the host page table
486 */
487static void gmap_unlink(struct mm_struct *mm, unsigned long *table,
488 unsigned long vmaddr)
489{
490 struct gmap *gmap;
491 int flush;
492
493 list_for_each_entry(gmap, &mm->context.gmap_list, list) {
494 flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
495 if (flush)
496 gmap_flush_tlb(gmap);
497 }
498}
499
500/**
501 * gmap_link - set up shadow page tables to connect a host to a guest address
502 * @gmap: pointer to guest mapping meta data structure
503 * @gaddr: guest address
504 * @vmaddr: vm address
505 *
506 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
507 * if the vm address is already mapped to a different guest segment.
508 * The mmap_sem of the mm that belongs to the address space must be held
509 * when this function gets called.
510 */
511int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
485{ 512{
486 unsigned long vmaddr;
487 struct vm_area_struct *vma;
488 struct gmap_pgtable *mp;
489 struct gmap_rmap *rmap;
490 struct mm_struct *mm; 513 struct mm_struct *mm;
491 struct page *page; 514 unsigned long *table;
515 spinlock_t *ptl;
492 pgd_t *pgd; 516 pgd_t *pgd;
493 pud_t *pud; 517 pud_t *pud;
494 pmd_t *pmd; 518 pmd_t *pmd;
519 int rc;
495 520
496 mm = gmap->mm; 521 /* Create higher level tables in the gmap page table */
497 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; 522 table = gmap->table;
498 vma = find_vma(mm, vmaddr); 523 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
499 if (!vma || vma->vm_start > vmaddr) 524 table += (gaddr >> 53) & 0x7ff;
500 return -EFAULT; 525 if ((*table & _REGION_ENTRY_INVALID) &&
526 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
527 gaddr & 0xffe0000000000000))
528 return -ENOMEM;
529 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
530 }
531 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
532 table += (gaddr >> 42) & 0x7ff;
533 if ((*table & _REGION_ENTRY_INVALID) &&
534 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
535 gaddr & 0xfffffc0000000000))
536 return -ENOMEM;
537 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
538 }
539 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
540 table += (gaddr >> 31) & 0x7ff;
541 if ((*table & _REGION_ENTRY_INVALID) &&
542 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
543 gaddr & 0xffffffff80000000))
544 return -ENOMEM;
545 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
546 }
547 table += (gaddr >> 20) & 0x7ff;
501 /* Walk the parent mm page table */ 548 /* Walk the parent mm page table */
549 mm = gmap->mm;
502 pgd = pgd_offset(mm, vmaddr); 550 pgd = pgd_offset(mm, vmaddr);
503 pud = pud_alloc(mm, pgd, vmaddr); 551 VM_BUG_ON(pgd_none(*pgd));
504 if (!pud) 552 pud = pud_offset(pgd, vmaddr);
505 return -ENOMEM; 553 VM_BUG_ON(pud_none(*pud));
506 pmd = pmd_alloc(mm, pud, vmaddr); 554 pmd = pmd_offset(pud, vmaddr);
507 if (!pmd) 555 VM_BUG_ON(pmd_none(*pmd));
508 return -ENOMEM;
509 if (!pmd_present(*pmd) &&
510 __pte_alloc(mm, vma, pmd, vmaddr))
511 return -ENOMEM;
512 /* large pmds cannot yet be handled */ 556 /* large pmds cannot yet be handled */
513 if (pmd_large(*pmd)) 557 if (pmd_large(*pmd))
514 return -EFAULT; 558 return -EFAULT;
515 /* pmd now points to a valid segment table entry. */
516 rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
517 if (!rmap)
518 return -ENOMEM;
519 /* Link gmap segment table entry location to page table. */ 559 /* Link gmap segment table entry location to page table. */
520 page = pmd_page(*pmd); 560 rc = radix_tree_preload(GFP_KERNEL);
521 mp = (struct gmap_pgtable *) page->index; 561 if (rc)
522 rmap->gmap = gmap; 562 return rc;
523 rmap->entry = segment_ptr; 563 ptl = pmd_lock(mm, pmd);
524 rmap->vmaddr = address & PMD_MASK; 564 spin_lock(&gmap->guest_table_lock);
525 spin_lock(&mm->page_table_lock); 565 if (*table == _SEGMENT_ENTRY_INVALID) {
526 if (*segment_ptr == segment) { 566 rc = radix_tree_insert(&gmap->host_to_guest,
527 list_add(&rmap->list, &mp->mapper); 567 vmaddr >> PMD_SHIFT, table);
528 /* Set gmap segment table entry to page table. */ 568 if (!rc)
529 *segment_ptr = pmd_val(*pmd) & PAGE_MASK; 569 *table = pmd_val(*pmd);
530 rmap = NULL; 570 } else
531 } 571 rc = 0;
532 spin_unlock(&mm->page_table_lock); 572 spin_unlock(&gmap->guest_table_lock);
533 kfree(rmap); 573 spin_unlock(ptl);
534 return 0; 574 radix_tree_preload_end();
535} 575 return rc;
536
537static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
538{
539 struct gmap_rmap *rmap, *next;
540 struct gmap_pgtable *mp;
541 struct page *page;
542 int flush;
543
544 flush = 0;
545 spin_lock(&mm->page_table_lock);
546 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
547 mp = (struct gmap_pgtable *) page->index;
548 list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
549 *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID |
550 _SEGMENT_ENTRY_PROTECT);
551 list_del(&rmap->list);
552 kfree(rmap);
553 flush = 1;
554 }
555 spin_unlock(&mm->page_table_lock);
556 if (flush)
557 __tlb_flush_global();
558} 576}
559 577
560/* 578/**
561 * this function is assumed to be called with mmap_sem held 579 * gmap_fault - resolve a fault on a guest address
580 * @gmap: pointer to guest mapping meta data structure
581 * @gaddr: guest address
582 * @fault_flags: flags to pass down to handle_mm_fault()
583 *
584 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
585 * if the vm address is already mapped to a different guest segment.
562 */ 586 */
563unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) 587int gmap_fault(struct gmap *gmap, unsigned long gaddr,
588 unsigned int fault_flags)
564{ 589{
565 unsigned long *segment_ptr, segment; 590 unsigned long vmaddr;
566 struct gmap_pgtable *mp;
567 struct page *page;
568 int rc; 591 int rc;
569 592
570 current->thread.gmap_addr = address;
571 segment_ptr = gmap_table_walk(address, gmap);
572 if (IS_ERR(segment_ptr))
573 return -EFAULT;
574 /* Convert the gmap address to an mm address. */
575 while (1) {
576 segment = *segment_ptr;
577 if (!(segment & _SEGMENT_ENTRY_INVALID)) {
578 /* Page table is present */
579 page = pfn_to_page(segment >> PAGE_SHIFT);
580 mp = (struct gmap_pgtable *) page->index;
581 return mp->vmaddr | (address & ~PMD_MASK);
582 }
583 if (!(segment & _SEGMENT_ENTRY_PROTECT))
584 /* Nothing mapped in the gmap address space. */
585 break;
586 rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
587 if (rc)
588 return rc;
589 }
590 return -EFAULT;
591}
592
593unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
594{
595 unsigned long rc;
596
597 down_read(&gmap->mm->mmap_sem); 593 down_read(&gmap->mm->mmap_sem);
598 rc = __gmap_fault(address, gmap); 594 vmaddr = __gmap_translate(gmap, gaddr);
595 if (IS_ERR_VALUE(vmaddr)) {
596 rc = vmaddr;
597 goto out_up;
598 }
599 if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) {
600 rc = -EFAULT;
601 goto out_up;
602 }
603 rc = __gmap_link(gmap, gaddr, vmaddr);
604out_up:
599 up_read(&gmap->mm->mmap_sem); 605 up_read(&gmap->mm->mmap_sem);
600
601 return rc; 606 return rc;
602} 607}
603EXPORT_SYMBOL_GPL(gmap_fault); 608EXPORT_SYMBOL_GPL(gmap_fault);
@@ -617,17 +622,24 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
617 free_swap_and_cache(entry); 622 free_swap_and_cache(entry);
618} 623}
619 624
620/** 625/*
621 * The mm->mmap_sem lock must be held 626 * this function is assumed to be called with mmap_sem held
622 */ 627 */
623static void gmap_zap_unused(struct mm_struct *mm, unsigned long address) 628void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
624{ 629{
625 unsigned long ptev, pgstev; 630 unsigned long vmaddr, ptev, pgstev;
631 pte_t *ptep, pte;
626 spinlock_t *ptl; 632 spinlock_t *ptl;
627 pgste_t pgste; 633 pgste_t pgste;
628 pte_t *ptep, pte;
629 634
630 ptep = get_locked_pte(mm, address, &ptl); 635 /* Find the vm address for the guest address */
636 vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
637 gaddr >> PMD_SHIFT);
638 if (!vmaddr)
639 return;
640 vmaddr |= gaddr & ~PMD_MASK;
641 /* Get pointer to the page table entry */
642 ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
631 if (unlikely(!ptep)) 643 if (unlikely(!ptep))
632 return; 644 return;
633 pte = *ptep; 645 pte = *ptep;
@@ -639,87 +651,34 @@ static void gmap_zap_unused(struct mm_struct *mm, unsigned long address)
639 ptev = pte_val(pte); 651 ptev = pte_val(pte);
640 if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || 652 if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
641 ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { 653 ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
642 gmap_zap_swap_entry(pte_to_swp_entry(pte), mm); 654 gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm);
643 pte_clear(mm, address, ptep); 655 pte_clear(gmap->mm, vmaddr, ptep);
644 } 656 }
645 pgste_set_unlock(ptep, pgste); 657 pgste_set_unlock(ptep, pgste);
646out_pte: 658out_pte:
647 pte_unmap_unlock(*ptep, ptl); 659 pte_unmap_unlock(*ptep, ptl);
648} 660}
649
650/*
651 * this function is assumed to be called with mmap_sem held
652 */
653void __gmap_zap(unsigned long address, struct gmap *gmap)
654{
655 unsigned long *table, *segment_ptr;
656 unsigned long segment, pgstev, ptev;
657 struct gmap_pgtable *mp;
658 struct page *page;
659
660 segment_ptr = gmap_table_walk(address, gmap);
661 if (IS_ERR(segment_ptr))
662 return;
663 segment = *segment_ptr;
664 if (segment & _SEGMENT_ENTRY_INVALID)
665 return;
666 page = pfn_to_page(segment >> PAGE_SHIFT);
667 mp = (struct gmap_pgtable *) page->index;
668 address = mp->vmaddr | (address & ~PMD_MASK);
669 /* Page table is present */
670 table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN);
671 table = table + ((address >> 12) & 0xff);
672 pgstev = table[PTRS_PER_PTE];
673 ptev = table[0];
674 /* quick check, checked again with locks held */
675 if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
676 ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID)))
677 gmap_zap_unused(gmap->mm, address);
678}
679EXPORT_SYMBOL_GPL(__gmap_zap); 661EXPORT_SYMBOL_GPL(__gmap_zap);
680 662
681void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) 663void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
682{ 664{
683 665 unsigned long gaddr, vmaddr, size;
684 unsigned long *table, address, size;
685 struct vm_area_struct *vma; 666 struct vm_area_struct *vma;
686 struct gmap_pgtable *mp;
687 struct page *page;
688 667
689 down_read(&gmap->mm->mmap_sem); 668 down_read(&gmap->mm->mmap_sem);
690 address = from; 669 for (gaddr = from; gaddr < to;
691 while (address < to) { 670 gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
692 /* Walk the gmap address space page table */ 671 /* Find the vm address for the guest address */
693 table = gmap->table + ((address >> 53) & 0x7ff); 672 vmaddr = (unsigned long)
694 if (unlikely(*table & _REGION_ENTRY_INVALID)) { 673 radix_tree_lookup(&gmap->guest_to_host,
695 address = (address + PMD_SIZE) & PMD_MASK; 674 gaddr >> PMD_SHIFT);
696 continue; 675 if (!vmaddr)
697 }
698 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
699 table = table + ((address >> 42) & 0x7ff);
700 if (unlikely(*table & _REGION_ENTRY_INVALID)) {
701 address = (address + PMD_SIZE) & PMD_MASK;
702 continue; 676 continue;
703 } 677 vmaddr |= gaddr & ~PMD_MASK;
704 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 678 /* Find vma in the parent mm */
705 table = table + ((address >> 31) & 0x7ff); 679 vma = find_vma(gmap->mm, vmaddr);
706 if (unlikely(*table & _REGION_ENTRY_INVALID)) { 680 size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
707 address = (address + PMD_SIZE) & PMD_MASK; 681 zap_page_range(vma, vmaddr, size, NULL);
708 continue;
709 }
710 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
711 table = table + ((address >> 20) & 0x7ff);
712 if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
713 address = (address + PMD_SIZE) & PMD_MASK;
714 continue;
715 }
716 page = pfn_to_page(*table >> PAGE_SHIFT);
717 mp = (struct gmap_pgtable *) page->index;
718 vma = find_vma(gmap->mm, mp->vmaddr);
719 size = min(to - address, PMD_SIZE - (address & ~PMD_MASK));
720 zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK),
721 size, NULL);
722 address = (address + PMD_SIZE) & PMD_MASK;
723 } 682 }
724 up_read(&gmap->mm->mmap_sem); 683 up_read(&gmap->mm->mmap_sem);
725} 684}
@@ -755,7 +714,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
755/** 714/**
756 * gmap_ipte_notify - mark a range of ptes for invalidation notification 715 * gmap_ipte_notify - mark a range of ptes for invalidation notification
757 * @gmap: pointer to guest mapping meta data structure 716 * @gmap: pointer to guest mapping meta data structure
758 * @start: virtual address in the guest address space 717 * @gaddr: virtual address in the guest address space
759 * @len: size of area 718 * @len: size of area
760 * 719 *
761 * Returns 0 if for each page in the given range a gmap mapping exists and 720 * Returns 0 if for each page in the given range a gmap mapping exists and
@@ -763,7 +722,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
763 * for one or more pages -EFAULT is returned. If no memory could be allocated 722 * for one or more pages -EFAULT is returned. If no memory could be allocated
764 * -ENOMEM is returned. This function establishes missing page table entries. 723 * -ENOMEM is returned. This function establishes missing page table entries.
765 */ 724 */
766int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) 725int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
767{ 726{
768 unsigned long addr; 727 unsigned long addr;
769 spinlock_t *ptl; 728 spinlock_t *ptl;
@@ -771,12 +730,12 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
771 pgste_t pgste; 730 pgste_t pgste;
772 int rc = 0; 731 int rc = 0;
773 732
774 if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK)) 733 if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
775 return -EINVAL; 734 return -EINVAL;
776 down_read(&gmap->mm->mmap_sem); 735 down_read(&gmap->mm->mmap_sem);
777 while (len) { 736 while (len) {
778 /* Convert gmap address and connect the page tables */ 737 /* Convert gmap address and connect the page tables */
779 addr = __gmap_fault(start, gmap); 738 addr = __gmap_translate(gmap, gaddr);
780 if (IS_ERR_VALUE(addr)) { 739 if (IS_ERR_VALUE(addr)) {
781 rc = addr; 740 rc = addr;
782 break; 741 break;
@@ -786,6 +745,9 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
786 rc = -EFAULT; 745 rc = -EFAULT;
787 break; 746 break;
788 } 747 }
748 rc = __gmap_link(gmap, gaddr, addr);
749 if (rc)
750 break;
789 /* Walk the process page table, lock and get pte pointer */ 751 /* Walk the process page table, lock and get pte pointer */
790 ptep = get_locked_pte(gmap->mm, addr, &ptl); 752 ptep = get_locked_pte(gmap->mm, addr, &ptl);
791 if (unlikely(!ptep)) 753 if (unlikely(!ptep))
@@ -796,7 +758,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
796 pgste = pgste_get_lock(ptep); 758 pgste = pgste_get_lock(ptep);
797 pgste_val(pgste) |= PGSTE_IN_BIT; 759 pgste_val(pgste) |= PGSTE_IN_BIT;
798 pgste_set_unlock(ptep, pgste); 760 pgste_set_unlock(ptep, pgste);
799 start += PAGE_SIZE; 761 gaddr += PAGE_SIZE;
800 len -= PAGE_SIZE; 762 len -= PAGE_SIZE;
801 } 763 }
802 spin_unlock(ptl); 764 spin_unlock(ptl);
@@ -809,28 +771,30 @@ EXPORT_SYMBOL_GPL(gmap_ipte_notify);
809/** 771/**
810 * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. 772 * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
811 * @mm: pointer to the process mm_struct 773 * @mm: pointer to the process mm_struct
774 * @addr: virtual address in the process address space
812 * @pte: pointer to the page table entry 775 * @pte: pointer to the page table entry
813 * 776 *
814 * This function is assumed to be called with the page table lock held 777 * This function is assumed to be called with the page table lock held
815 * for the pte to notify. 778 * for the pte to notify.
816 */ 779 */
817void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte) 780void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
818{ 781{
819 unsigned long segment_offset; 782 unsigned long offset, gaddr;
783 unsigned long *table;
820 struct gmap_notifier *nb; 784 struct gmap_notifier *nb;
821 struct gmap_pgtable *mp; 785 struct gmap *gmap;
822 struct gmap_rmap *rmap;
823 struct page *page;
824 786
825 segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); 787 offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
826 segment_offset = segment_offset * (4096 / sizeof(pte_t)); 788 offset = offset * (4096 / sizeof(pte_t));
827 page = pfn_to_page(__pa(pte) >> PAGE_SHIFT);
828 mp = (struct gmap_pgtable *) page->index;
829 spin_lock(&gmap_notifier_lock); 789 spin_lock(&gmap_notifier_lock);
830 list_for_each_entry(rmap, &mp->mapper, list) { 790 list_for_each_entry(gmap, &mm->context.gmap_list, list) {
791 table = radix_tree_lookup(&gmap->host_to_guest,
792 vmaddr >> PMD_SHIFT);
793 if (!table)
794 continue;
795 gaddr = __gmap_segment_gaddr(table) + offset;
831 list_for_each_entry(nb, &gmap_notifier_list, list) 796 list_for_each_entry(nb, &gmap_notifier_list, list)
832 nb->notifier_call(rmap->gmap, 797 nb->notifier_call(gmap, gaddr);
833 rmap->vmaddr + segment_offset);
834 } 798 }
835 spin_unlock(&gmap_notifier_lock); 799 spin_unlock(&gmap_notifier_lock);
836} 800}
@@ -841,29 +805,18 @@ static inline int page_table_with_pgste(struct page *page)
841 return atomic_read(&page->_mapcount) == 0; 805 return atomic_read(&page->_mapcount) == 0;
842} 806}
843 807
844static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 808static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
845 unsigned long vmaddr)
846{ 809{
847 struct page *page; 810 struct page *page;
848 unsigned long *table; 811 unsigned long *table;
849 struct gmap_pgtable *mp;
850 812
851 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 813 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
852 if (!page) 814 if (!page)
853 return NULL; 815 return NULL;
854 mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT);
855 if (!mp) {
856 __free_page(page);
857 return NULL;
858 }
859 if (!pgtable_page_ctor(page)) { 816 if (!pgtable_page_ctor(page)) {
860 kfree(mp);
861 __free_page(page); 817 __free_page(page);
862 return NULL; 818 return NULL;
863 } 819 }
864 mp->vmaddr = vmaddr & PMD_MASK;
865 INIT_LIST_HEAD(&mp->mapper);
866 page->index = (unsigned long) mp;
867 atomic_set(&page->_mapcount, 0); 820 atomic_set(&page->_mapcount, 0);
868 table = (unsigned long *) page_to_phys(page); 821 table = (unsigned long *) page_to_phys(page);
869 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); 822 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
@@ -874,14 +827,10 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
874static inline void page_table_free_pgste(unsigned long *table) 827static inline void page_table_free_pgste(unsigned long *table)
875{ 828{
876 struct page *page; 829 struct page *page;
877 struct gmap_pgtable *mp;
878 830
879 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 831 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
880 mp = (struct gmap_pgtable *) page->index;
881 BUG_ON(!list_empty(&mp->mapper));
882 pgtable_page_dtor(page); 832 pgtable_page_dtor(page);
883 atomic_set(&page->_mapcount, -1); 833 atomic_set(&page->_mapcount, -1);
884 kfree(mp);
885 __free_page(page); 834 __free_page(page);
886} 835}
887 836
@@ -1038,8 +987,7 @@ static inline int page_table_with_pgste(struct page *page)
1038 return 0; 987 return 0;
1039} 988}
1040 989
1041static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 990static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
1042 unsigned long vmaddr)
1043{ 991{
1044 return NULL; 992 return NULL;
1045} 993}
@@ -1053,8 +1001,8 @@ static inline void page_table_free_pgste(unsigned long *table)
1053{ 1001{
1054} 1002}
1055 1003
1056static inline void gmap_disconnect_pgtable(struct mm_struct *mm, 1004static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
1057 unsigned long *table) 1005 unsigned long vmaddr)
1058{ 1006{
1059} 1007}
1060 1008
@@ -1074,14 +1022,14 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
1074/* 1022/*
1075 * page table entry allocation/free routines. 1023 * page table entry allocation/free routines.
1076 */ 1024 */
1077unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) 1025unsigned long *page_table_alloc(struct mm_struct *mm)
1078{ 1026{
1079 unsigned long *uninitialized_var(table); 1027 unsigned long *uninitialized_var(table);
1080 struct page *uninitialized_var(page); 1028 struct page *uninitialized_var(page);
1081 unsigned int mask, bit; 1029 unsigned int mask, bit;
1082 1030
1083 if (mm_has_pgste(mm)) 1031 if (mm_has_pgste(mm))
1084 return page_table_alloc_pgste(mm, vmaddr); 1032 return page_table_alloc_pgste(mm);
1085 /* Allocate fragments of a 4K page as 1K/2K page table */ 1033 /* Allocate fragments of a 4K page as 1K/2K page table */
1086 spin_lock_bh(&mm->context.list_lock); 1034 spin_lock_bh(&mm->context.list_lock);
1087 mask = FRAG_MASK; 1035 mask = FRAG_MASK;
@@ -1123,10 +1071,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
1123 unsigned int bit, mask; 1071 unsigned int bit, mask;
1124 1072
1125 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1073 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1126 if (page_table_with_pgste(page)) { 1074 if (page_table_with_pgste(page))
1127 gmap_disconnect_pgtable(mm, table);
1128 return page_table_free_pgste(table); 1075 return page_table_free_pgste(table);
1129 }
1130 /* Free 1K/2K page table fragment of a 4K page */ 1076 /* Free 1K/2K page table fragment of a 4K page */
1131 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); 1077 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
1132 spin_lock_bh(&mm->context.list_lock); 1078 spin_lock_bh(&mm->context.list_lock);
@@ -1158,7 +1104,8 @@ static void __page_table_free_rcu(void *table, unsigned bit)
1158 } 1104 }
1159} 1105}
1160 1106
1161void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) 1107void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
1108 unsigned long vmaddr)
1162{ 1109{
1163 struct mm_struct *mm; 1110 struct mm_struct *mm;
1164 struct page *page; 1111 struct page *page;
@@ -1167,7 +1114,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
1167 mm = tlb->mm; 1114 mm = tlb->mm;
1168 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1115 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1169 if (page_table_with_pgste(page)) { 1116 if (page_table_with_pgste(page)) {
1170 gmap_disconnect_pgtable(mm, table); 1117 gmap_unlink(mm, table, vmaddr);
1171 table = (unsigned long *) (__pa(table) | FRAG_MASK); 1118 table = (unsigned long *) (__pa(table) | FRAG_MASK);
1172 tlb_remove_table(tlb, table); 1119 tlb_remove_table(tlb, table);
1173 return; 1120 return;
@@ -1303,7 +1250,7 @@ again:
1303 if (page_table_with_pgste(page)) 1250 if (page_table_with_pgste(page))
1304 continue; 1251 continue;
1305 /* Allocate new page table with pgstes */ 1252 /* Allocate new page table with pgstes */
1306 new = page_table_alloc_pgste(mm, addr); 1253 new = page_table_alloc_pgste(mm);
1307 if (!new) 1254 if (!new)
1308 return -ENOMEM; 1255 return -ENOMEM;
1309 1256
@@ -1318,7 +1265,7 @@ again:
1318 /* Establish new table */ 1265 /* Establish new table */
1319 pmd_populate(mm, pmd, (pte_t *) new); 1266 pmd_populate(mm, pmd, (pte_t *) new);
1320 /* Free old table with rcu, there might be a walker! */ 1267 /* Free old table with rcu, there might be a walker! */
1321 page_table_free_rcu(tlb, table); 1268 page_table_free_rcu(tlb, table, addr);
1322 new = NULL; 1269 new = NULL;
1323 } 1270 }
1324 spin_unlock(ptl); 1271 spin_unlock(ptl);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index fe9012a49aa5..fdbd7888cb07 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -65,7 +65,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address)
65 pte_t *pte; 65 pte_t *pte;
66 66
67 if (slab_is_available()) 67 if (slab_is_available())
68 pte = (pte_t *) page_table_alloc(&init_mm, address); 68 pte = (pte_t *) page_table_alloc(&init_mm);
69 else 69 else
70 pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t), 70 pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t),
71 PTRS_PER_PTE * sizeof(pte_t)); 71 PTRS_PER_PTE * sizeof(pte_t));