diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-08-02 16:11:27 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-08-02 16:11:27 -0400 |
commit | 221bb8a46e230b9824204ae86537183d9991ff2a (patch) | |
tree | 92510d72285b2285be7cb87288bf088cb28af4c1 /arch/s390/mm | |
parent | f7b32e4c021fd788f13f6785e17efbc3eb05b351 (diff) | |
parent | 23528bb21ee2c9b27f3feddd77a2a3351a8df148 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
- ARM: GICv3 ITS emulation and various fixes. Removal of the
old VGIC implementation.
- s390: support for trapping software breakpoints, nested
virtualization (vSIE), the STHYI opcode, initial extensions
for CPU model support.
- MIPS: support for MIPS64 hosts (32-bit guests only) and lots
of cleanups, preliminary to this and the upcoming support for
hardware virtualization extensions.
- x86: support for execute-only mappings in nested EPT; reduced
vmexit latency for TSC deadline timer (by about 30%) on Intel
hosts; support for more than 255 vCPUs.
- PPC: bugfixes.
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (302 commits)
KVM: PPC: Introduce KVM_CAP_PPC_HTM
MIPS: Select HAVE_KVM for MIPS64_R{2,6}
MIPS: KVM: Reset CP0_PageMask during host TLB flush
MIPS: KVM: Fix ptr->int cast via KVM_GUEST_KSEGX()
MIPS: KVM: Sign extend MFC0/RDHWR results
MIPS: KVM: Fix 64-bit big endian dynamic translation
MIPS: KVM: Fail if ebase doesn't fit in CP0_EBase
MIPS: KVM: Use 64-bit CP0_EBase when appropriate
MIPS: KVM: Set CP0_Status.KX on MIPS64
MIPS: KVM: Make entry code MIPS64 friendly
MIPS: KVM: Use kmap instead of CKSEG0ADDR()
MIPS: KVM: Use virt_to_phys() to get commpage PFN
MIPS: Fix definition of KSEGX() for 64-bit
KVM: VMX: Add VMCS to CPU's loaded VMCSs before VMPTRLD
kvm: x86: nVMX: maintain internal copy of current VMCS
KVM: PPC: Book3S HV: Save/restore TM state in H_CEDE
KVM: PPC: Book3S HV: Pull out TM state save/restore into separate procedures
KVM: arm64: vgic-its: Simplify MAPI error handling
KVM: arm64: vgic-its: Make vgic_its_cmd_handle_mapi similar to other handlers
KVM: arm64: vgic-its: Turn device_id validation into generic ID validation
...
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/fault.c | 2 | ||||
-rw-r--r-- | arch/s390/mm/gmap.c | 1574 | ||||
-rw-r--r-- | arch/s390/mm/pgalloc.c | 39 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 209 |
4 files changed, 1707 insertions, 117 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 25783dc3c813..a58bca62a93b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c | |||
@@ -418,6 +418,8 @@ static inline int do_exception(struct pt_regs *regs, int access) | |||
418 | (struct gmap *) S390_lowcore.gmap : NULL; | 418 | (struct gmap *) S390_lowcore.gmap : NULL; |
419 | if (gmap) { | 419 | if (gmap) { |
420 | current->thread.gmap_addr = address; | 420 | current->thread.gmap_addr = address; |
421 | current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE); | ||
422 | current->thread.gmap_int_code = regs->int_code & 0xffff; | ||
421 | address = __gmap_translate(gmap, address); | 423 | address = __gmap_translate(gmap, address); |
422 | if (address == -EFAULT) { | 424 | if (address == -EFAULT) { |
423 | fault = VM_FAULT_BADMAP; | 425 | fault = VM_FAULT_BADMAP; |
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 063c721ec0dc..2ce6bb3bab32 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c | |||
@@ -20,14 +20,16 @@ | |||
20 | #include <asm/gmap.h> | 20 | #include <asm/gmap.h> |
21 | #include <asm/tlb.h> | 21 | #include <asm/tlb.h> |
22 | 22 | ||
23 | #define GMAP_SHADOW_FAKE_TABLE 1ULL | ||
24 | |||
23 | /** | 25 | /** |
24 | * gmap_alloc - allocate a guest address space | 26 | * gmap_alloc - allocate and initialize a guest address space |
25 | * @mm: pointer to the parent mm_struct | 27 | * @mm: pointer to the parent mm_struct |
26 | * @limit: maximum address of the gmap address space | 28 | * @limit: maximum address of the gmap address space |
27 | * | 29 | * |
28 | * Returns a guest address space structure. | 30 | * Returns a guest address space structure. |
29 | */ | 31 | */ |
30 | struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) | 32 | static struct gmap *gmap_alloc(unsigned long limit) |
31 | { | 33 | { |
32 | struct gmap *gmap; | 34 | struct gmap *gmap; |
33 | struct page *page; | 35 | struct page *page; |
@@ -55,10 +57,14 @@ struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) | |||
55 | if (!gmap) | 57 | if (!gmap) |
56 | goto out; | 58 | goto out; |
57 | INIT_LIST_HEAD(&gmap->crst_list); | 59 | INIT_LIST_HEAD(&gmap->crst_list); |
60 | INIT_LIST_HEAD(&gmap->children); | ||
61 | INIT_LIST_HEAD(&gmap->pt_list); | ||
58 | INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); | 62 | INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); |
59 | INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); | 63 | INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); |
64 | INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC); | ||
60 | spin_lock_init(&gmap->guest_table_lock); | 65 | spin_lock_init(&gmap->guest_table_lock); |
61 | gmap->mm = mm; | 66 | spin_lock_init(&gmap->shadow_lock); |
67 | atomic_set(&gmap->ref_count, 1); | ||
62 | page = alloc_pages(GFP_KERNEL, 2); | 68 | page = alloc_pages(GFP_KERNEL, 2); |
63 | if (!page) | 69 | if (!page) |
64 | goto out_free; | 70 | goto out_free; |
@@ -70,9 +76,6 @@ struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) | |||
70 | gmap->asce = atype | _ASCE_TABLE_LENGTH | | 76 | gmap->asce = atype | _ASCE_TABLE_LENGTH | |
71 | _ASCE_USER_BITS | __pa(table); | 77 | _ASCE_USER_BITS | __pa(table); |
72 | gmap->asce_end = limit; | 78 | gmap->asce_end = limit; |
73 | down_write(&mm->mmap_sem); | ||
74 | list_add(&gmap->list, &mm->context.gmap_list); | ||
75 | up_write(&mm->mmap_sem); | ||
76 | return gmap; | 79 | return gmap; |
77 | 80 | ||
78 | out_free: | 81 | out_free: |
@@ -80,7 +83,28 @@ out_free: | |||
80 | out: | 83 | out: |
81 | return NULL; | 84 | return NULL; |
82 | } | 85 | } |
83 | EXPORT_SYMBOL_GPL(gmap_alloc); | 86 | |
87 | /** | ||
88 | * gmap_create - create a guest address space | ||
89 | * @mm: pointer to the parent mm_struct | ||
90 | * @limit: maximum size of the gmap address space | ||
91 | * | ||
92 | * Returns a guest address space structure. | ||
93 | */ | ||
94 | struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit) | ||
95 | { | ||
96 | struct gmap *gmap; | ||
97 | |||
98 | gmap = gmap_alloc(limit); | ||
99 | if (!gmap) | ||
100 | return NULL; | ||
101 | gmap->mm = mm; | ||
102 | spin_lock(&mm->context.gmap_lock); | ||
103 | list_add_rcu(&gmap->list, &mm->context.gmap_list); | ||
104 | spin_unlock(&mm->context.gmap_lock); | ||
105 | return gmap; | ||
106 | } | ||
107 | EXPORT_SYMBOL_GPL(gmap_create); | ||
84 | 108 | ||
85 | static void gmap_flush_tlb(struct gmap *gmap) | 109 | static void gmap_flush_tlb(struct gmap *gmap) |
86 | { | 110 | { |
@@ -114,31 +138,117 @@ static void gmap_radix_tree_free(struct radix_tree_root *root) | |||
114 | } while (nr > 0); | 138 | } while (nr > 0); |
115 | } | 139 | } |
116 | 140 | ||
141 | static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) | ||
142 | { | ||
143 | struct gmap_rmap *rmap, *rnext, *head; | ||
144 | struct radix_tree_iter iter; | ||
145 | unsigned long indices[16]; | ||
146 | unsigned long index; | ||
147 | void **slot; | ||
148 | int i, nr; | ||
149 | |||
150 | /* A radix tree is freed by deleting all of its entries */ | ||
151 | index = 0; | ||
152 | do { | ||
153 | nr = 0; | ||
154 | radix_tree_for_each_slot(slot, root, &iter, index) { | ||
155 | indices[nr] = iter.index; | ||
156 | if (++nr == 16) | ||
157 | break; | ||
158 | } | ||
159 | for (i = 0; i < nr; i++) { | ||
160 | index = indices[i]; | ||
161 | head = radix_tree_delete(root, index); | ||
162 | gmap_for_each_rmap_safe(rmap, rnext, head) | ||
163 | kfree(rmap); | ||
164 | } | ||
165 | } while (nr > 0); | ||
166 | } | ||
167 | |||
117 | /** | 168 | /** |
118 | * gmap_free - free a guest address space | 169 | * gmap_free - free a guest address space |
119 | * @gmap: pointer to the guest address space structure | 170 | * @gmap: pointer to the guest address space structure |
171 | * | ||
172 | * No locks required. There are no references to this gmap anymore. | ||
120 | */ | 173 | */ |
121 | void gmap_free(struct gmap *gmap) | 174 | static void gmap_free(struct gmap *gmap) |
122 | { | 175 | { |
123 | struct page *page, *next; | 176 | struct page *page, *next; |
124 | 177 | ||
125 | /* Flush tlb. */ | 178 | /* Flush tlb of all gmaps (if not already done for shadows) */ |
126 | if (MACHINE_HAS_IDTE) | 179 | if (!(gmap_is_shadow(gmap) && gmap->removed)) |
127 | __tlb_flush_idte(gmap->asce); | 180 | gmap_flush_tlb(gmap); |
128 | else | ||
129 | __tlb_flush_global(); | ||
130 | |||
131 | /* Free all segment & region tables. */ | 181 | /* Free all segment & region tables. */ |
132 | list_for_each_entry_safe(page, next, &gmap->crst_list, lru) | 182 | list_for_each_entry_safe(page, next, &gmap->crst_list, lru) |
133 | __free_pages(page, 2); | 183 | __free_pages(page, 2); |
134 | gmap_radix_tree_free(&gmap->guest_to_host); | 184 | gmap_radix_tree_free(&gmap->guest_to_host); |
135 | gmap_radix_tree_free(&gmap->host_to_guest); | 185 | gmap_radix_tree_free(&gmap->host_to_guest); |
136 | down_write(&gmap->mm->mmap_sem); | 186 | |
137 | list_del(&gmap->list); | 187 | /* Free additional data for a shadow gmap */ |
138 | up_write(&gmap->mm->mmap_sem); | 188 | if (gmap_is_shadow(gmap)) { |
189 | /* Free all page tables. */ | ||
190 | list_for_each_entry_safe(page, next, &gmap->pt_list, lru) | ||
191 | page_table_free_pgste(page); | ||
192 | gmap_rmap_radix_tree_free(&gmap->host_to_rmap); | ||
193 | /* Release reference to the parent */ | ||
194 | gmap_put(gmap->parent); | ||
195 | } | ||
196 | |||
139 | kfree(gmap); | 197 | kfree(gmap); |
140 | } | 198 | } |
141 | EXPORT_SYMBOL_GPL(gmap_free); | 199 | |
200 | /** | ||
201 | * gmap_get - increase reference counter for guest address space | ||
202 | * @gmap: pointer to the guest address space structure | ||
203 | * | ||
204 | * Returns the gmap pointer | ||
205 | */ | ||
206 | struct gmap *gmap_get(struct gmap *gmap) | ||
207 | { | ||
208 | atomic_inc(&gmap->ref_count); | ||
209 | return gmap; | ||
210 | } | ||
211 | EXPORT_SYMBOL_GPL(gmap_get); | ||
212 | |||
213 | /** | ||
214 | * gmap_put - decrease reference counter for guest address space | ||
215 | * @gmap: pointer to the guest address space structure | ||
216 | * | ||
217 | * If the reference counter reaches zero the guest address space is freed. | ||
218 | */ | ||
219 | void gmap_put(struct gmap *gmap) | ||
220 | { | ||
221 | if (atomic_dec_return(&gmap->ref_count) == 0) | ||
222 | gmap_free(gmap); | ||
223 | } | ||
224 | EXPORT_SYMBOL_GPL(gmap_put); | ||
225 | |||
226 | /** | ||
227 | * gmap_remove - remove a guest address space but do not free it yet | ||
228 | * @gmap: pointer to the guest address space structure | ||
229 | */ | ||
230 | void gmap_remove(struct gmap *gmap) | ||
231 | { | ||
232 | struct gmap *sg, *next; | ||
233 | |||
234 | /* Remove all shadow gmaps linked to this gmap */ | ||
235 | if (!list_empty(&gmap->children)) { | ||
236 | spin_lock(&gmap->shadow_lock); | ||
237 | list_for_each_entry_safe(sg, next, &gmap->children, list) { | ||
238 | list_del(&sg->list); | ||
239 | gmap_put(sg); | ||
240 | } | ||
241 | spin_unlock(&gmap->shadow_lock); | ||
242 | } | ||
243 | /* Remove gmap from the pre-mm list */ | ||
244 | spin_lock(&gmap->mm->context.gmap_lock); | ||
245 | list_del_rcu(&gmap->list); | ||
246 | spin_unlock(&gmap->mm->context.gmap_lock); | ||
247 | synchronize_rcu(); | ||
248 | /* Put reference */ | ||
249 | gmap_put(gmap); | ||
250 | } | ||
251 | EXPORT_SYMBOL_GPL(gmap_remove); | ||
142 | 252 | ||
143 | /** | 253 | /** |
144 | * gmap_enable - switch primary space to the guest address space | 254 | * gmap_enable - switch primary space to the guest address space |
@@ -160,6 +270,17 @@ void gmap_disable(struct gmap *gmap) | |||
160 | } | 270 | } |
161 | EXPORT_SYMBOL_GPL(gmap_disable); | 271 | EXPORT_SYMBOL_GPL(gmap_disable); |
162 | 272 | ||
273 | /** | ||
274 | * gmap_get_enabled - get a pointer to the currently enabled gmap | ||
275 | * | ||
276 | * Returns a pointer to the currently enabled gmap. 0 if none is enabled. | ||
277 | */ | ||
278 | struct gmap *gmap_get_enabled(void) | ||
279 | { | ||
280 | return (struct gmap *) S390_lowcore.gmap; | ||
281 | } | ||
282 | EXPORT_SYMBOL_GPL(gmap_get_enabled); | ||
283 | |||
163 | /* | 284 | /* |
164 | * gmap_alloc_table is assumed to be called with mmap_sem held | 285 | * gmap_alloc_table is assumed to be called with mmap_sem held |
165 | */ | 286 | */ |
@@ -175,7 +296,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, | |||
175 | return -ENOMEM; | 296 | return -ENOMEM; |
176 | new = (unsigned long *) page_to_phys(page); | 297 | new = (unsigned long *) page_to_phys(page); |
177 | crst_table_init(new, init); | 298 | crst_table_init(new, init); |
178 | spin_lock(&gmap->mm->page_table_lock); | 299 | spin_lock(&gmap->guest_table_lock); |
179 | if (*table & _REGION_ENTRY_INVALID) { | 300 | if (*table & _REGION_ENTRY_INVALID) { |
180 | list_add(&page->lru, &gmap->crst_list); | 301 | list_add(&page->lru, &gmap->crst_list); |
181 | *table = (unsigned long) new | _REGION_ENTRY_LENGTH | | 302 | *table = (unsigned long) new | _REGION_ENTRY_LENGTH | |
@@ -183,7 +304,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, | |||
183 | page->index = gaddr; | 304 | page->index = gaddr; |
184 | page = NULL; | 305 | page = NULL; |
185 | } | 306 | } |
186 | spin_unlock(&gmap->mm->page_table_lock); | 307 | spin_unlock(&gmap->guest_table_lock); |
187 | if (page) | 308 | if (page) |
188 | __free_pages(page, 2); | 309 | __free_pages(page, 2); |
189 | return 0; | 310 | return 0; |
@@ -219,6 +340,7 @@ static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) | |||
219 | unsigned long *entry; | 340 | unsigned long *entry; |
220 | int flush = 0; | 341 | int flush = 0; |
221 | 342 | ||
343 | BUG_ON(gmap_is_shadow(gmap)); | ||
222 | spin_lock(&gmap->guest_table_lock); | 344 | spin_lock(&gmap->guest_table_lock); |
223 | entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); | 345 | entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); |
224 | if (entry) { | 346 | if (entry) { |
@@ -258,6 +380,7 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) | |||
258 | unsigned long off; | 380 | unsigned long off; |
259 | int flush; | 381 | int flush; |
260 | 382 | ||
383 | BUG_ON(gmap_is_shadow(gmap)); | ||
261 | if ((to | len) & (PMD_SIZE - 1)) | 384 | if ((to | len) & (PMD_SIZE - 1)) |
262 | return -EINVAL; | 385 | return -EINVAL; |
263 | if (len == 0 || to + len < to) | 386 | if (len == 0 || to + len < to) |
@@ -289,6 +412,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, | |||
289 | unsigned long off; | 412 | unsigned long off; |
290 | int flush; | 413 | int flush; |
291 | 414 | ||
415 | BUG_ON(gmap_is_shadow(gmap)); | ||
292 | if ((from | to | len) & (PMD_SIZE - 1)) | 416 | if ((from | to | len) & (PMD_SIZE - 1)) |
293 | return -EINVAL; | 417 | return -EINVAL; |
294 | if (len == 0 || from + len < from || to + len < to || | 418 | if (len == 0 || from + len < from || to + len < to || |
@@ -326,6 +450,8 @@ EXPORT_SYMBOL_GPL(gmap_map_segment); | |||
326 | * This function does not establish potentially missing page table entries. | 450 | * This function does not establish potentially missing page table entries. |
327 | * The mmap_sem of the mm that belongs to the address space must be held | 451 | * The mmap_sem of the mm that belongs to the address space must be held |
328 | * when this function gets called. | 452 | * when this function gets called. |
453 | * | ||
454 | * Note: Can also be called for shadow gmaps. | ||
329 | */ | 455 | */ |
330 | unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) | 456 | unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) |
331 | { | 457 | { |
@@ -333,6 +459,7 @@ unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) | |||
333 | 459 | ||
334 | vmaddr = (unsigned long) | 460 | vmaddr = (unsigned long) |
335 | radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); | 461 | radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); |
462 | /* Note: guest_to_host is empty for a shadow gmap */ | ||
336 | return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; | 463 | return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; |
337 | } | 464 | } |
338 | EXPORT_SYMBOL_GPL(__gmap_translate); | 465 | EXPORT_SYMBOL_GPL(__gmap_translate); |
@@ -369,11 +496,13 @@ void gmap_unlink(struct mm_struct *mm, unsigned long *table, | |||
369 | struct gmap *gmap; | 496 | struct gmap *gmap; |
370 | int flush; | 497 | int flush; |
371 | 498 | ||
372 | list_for_each_entry(gmap, &mm->context.gmap_list, list) { | 499 | rcu_read_lock(); |
500 | list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { | ||
373 | flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); | 501 | flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); |
374 | if (flush) | 502 | if (flush) |
375 | gmap_flush_tlb(gmap); | 503 | gmap_flush_tlb(gmap); |
376 | } | 504 | } |
505 | rcu_read_unlock(); | ||
377 | } | 506 | } |
378 | 507 | ||
379 | /** | 508 | /** |
@@ -397,6 +526,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) | |||
397 | pmd_t *pmd; | 526 | pmd_t *pmd; |
398 | int rc; | 527 | int rc; |
399 | 528 | ||
529 | BUG_ON(gmap_is_shadow(gmap)); | ||
400 | /* Create higher level tables in the gmap page table */ | 530 | /* Create higher level tables in the gmap page table */ |
401 | table = gmap->table; | 531 | table = gmap->table; |
402 | if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { | 532 | if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { |
@@ -552,116 +682,1412 @@ static LIST_HEAD(gmap_notifier_list); | |||
552 | static DEFINE_SPINLOCK(gmap_notifier_lock); | 682 | static DEFINE_SPINLOCK(gmap_notifier_lock); |
553 | 683 | ||
554 | /** | 684 | /** |
555 | * gmap_register_ipte_notifier - register a pte invalidation callback | 685 | * gmap_register_pte_notifier - register a pte invalidation callback |
556 | * @nb: pointer to the gmap notifier block | 686 | * @nb: pointer to the gmap notifier block |
557 | */ | 687 | */ |
558 | void gmap_register_ipte_notifier(struct gmap_notifier *nb) | 688 | void gmap_register_pte_notifier(struct gmap_notifier *nb) |
559 | { | 689 | { |
560 | spin_lock(&gmap_notifier_lock); | 690 | spin_lock(&gmap_notifier_lock); |
561 | list_add(&nb->list, &gmap_notifier_list); | 691 | list_add_rcu(&nb->list, &gmap_notifier_list); |
562 | spin_unlock(&gmap_notifier_lock); | 692 | spin_unlock(&gmap_notifier_lock); |
563 | } | 693 | } |
564 | EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); | 694 | EXPORT_SYMBOL_GPL(gmap_register_pte_notifier); |
565 | 695 | ||
566 | /** | 696 | /** |
567 | * gmap_unregister_ipte_notifier - remove a pte invalidation callback | 697 | * gmap_unregister_pte_notifier - remove a pte invalidation callback |
568 | * @nb: pointer to the gmap notifier block | 698 | * @nb: pointer to the gmap notifier block |
569 | */ | 699 | */ |
570 | void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) | 700 | void gmap_unregister_pte_notifier(struct gmap_notifier *nb) |
571 | { | 701 | { |
572 | spin_lock(&gmap_notifier_lock); | 702 | spin_lock(&gmap_notifier_lock); |
573 | list_del_init(&nb->list); | 703 | list_del_rcu(&nb->list); |
574 | spin_unlock(&gmap_notifier_lock); | 704 | spin_unlock(&gmap_notifier_lock); |
705 | synchronize_rcu(); | ||
706 | } | ||
707 | EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier); | ||
708 | |||
709 | /** | ||
710 | * gmap_call_notifier - call all registered invalidation callbacks | ||
711 | * @gmap: pointer to guest mapping meta data structure | ||
712 | * @start: start virtual address in the guest address space | ||
713 | * @end: end virtual address in the guest address space | ||
714 | */ | ||
715 | static void gmap_call_notifier(struct gmap *gmap, unsigned long start, | ||
716 | unsigned long end) | ||
717 | { | ||
718 | struct gmap_notifier *nb; | ||
719 | |||
720 | list_for_each_entry(nb, &gmap_notifier_list, list) | ||
721 | nb->notifier_call(gmap, start, end); | ||
722 | } | ||
723 | |||
724 | /** | ||
725 | * gmap_table_walk - walk the gmap page tables | ||
726 | * @gmap: pointer to guest mapping meta data structure | ||
727 | * @gaddr: virtual address in the guest address space | ||
728 | * @level: page table level to stop at | ||
729 | * | ||
730 | * Returns a table entry pointer for the given guest address and @level | ||
731 | * @level=0 : returns a pointer to a page table table entry (or NULL) | ||
732 | * @level=1 : returns a pointer to a segment table entry (or NULL) | ||
733 | * @level=2 : returns a pointer to a region-3 table entry (or NULL) | ||
734 | * @level=3 : returns a pointer to a region-2 table entry (or NULL) | ||
735 | * @level=4 : returns a pointer to a region-1 table entry (or NULL) | ||
736 | * | ||
737 | * Returns NULL if the gmap page tables could not be walked to the | ||
738 | * requested level. | ||
739 | * | ||
740 | * Note: Can also be called for shadow gmaps. | ||
741 | */ | ||
742 | static inline unsigned long *gmap_table_walk(struct gmap *gmap, | ||
743 | unsigned long gaddr, int level) | ||
744 | { | ||
745 | unsigned long *table; | ||
746 | |||
747 | if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4)) | ||
748 | return NULL; | ||
749 | if (gmap_is_shadow(gmap) && gmap->removed) | ||
750 | return NULL; | ||
751 | if (gaddr & (-1UL << (31 + ((gmap->asce & _ASCE_TYPE_MASK) >> 2)*11))) | ||
752 | return NULL; | ||
753 | table = gmap->table; | ||
754 | switch (gmap->asce & _ASCE_TYPE_MASK) { | ||
755 | case _ASCE_TYPE_REGION1: | ||
756 | table += (gaddr >> 53) & 0x7ff; | ||
757 | if (level == 4) | ||
758 | break; | ||
759 | if (*table & _REGION_ENTRY_INVALID) | ||
760 | return NULL; | ||
761 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
762 | /* Fallthrough */ | ||
763 | case _ASCE_TYPE_REGION2: | ||
764 | table += (gaddr >> 42) & 0x7ff; | ||
765 | if (level == 3) | ||
766 | break; | ||
767 | if (*table & _REGION_ENTRY_INVALID) | ||
768 | return NULL; | ||
769 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
770 | /* Fallthrough */ | ||
771 | case _ASCE_TYPE_REGION3: | ||
772 | table += (gaddr >> 31) & 0x7ff; | ||
773 | if (level == 2) | ||
774 | break; | ||
775 | if (*table & _REGION_ENTRY_INVALID) | ||
776 | return NULL; | ||
777 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | ||
778 | /* Fallthrough */ | ||
779 | case _ASCE_TYPE_SEGMENT: | ||
780 | table += (gaddr >> 20) & 0x7ff; | ||
781 | if (level == 1) | ||
782 | break; | ||
783 | if (*table & _REGION_ENTRY_INVALID) | ||
784 | return NULL; | ||
785 | table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); | ||
786 | table += (gaddr >> 12) & 0xff; | ||
787 | } | ||
788 | return table; | ||
789 | } | ||
790 | |||
791 | /** | ||
792 | * gmap_pte_op_walk - walk the gmap page table, get the page table lock | ||
793 | * and return the pte pointer | ||
794 | * @gmap: pointer to guest mapping meta data structure | ||
795 | * @gaddr: virtual address in the guest address space | ||
796 | * @ptl: pointer to the spinlock pointer | ||
797 | * | ||
798 | * Returns a pointer to the locked pte for a guest address, or NULL | ||
799 | * | ||
800 | * Note: Can also be called for shadow gmaps. | ||
801 | */ | ||
802 | static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr, | ||
803 | spinlock_t **ptl) | ||
804 | { | ||
805 | unsigned long *table; | ||
806 | |||
807 | if (gmap_is_shadow(gmap)) | ||
808 | spin_lock(&gmap->guest_table_lock); | ||
809 | /* Walk the gmap page table, lock and get pte pointer */ | ||
810 | table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */ | ||
811 | if (!table || *table & _SEGMENT_ENTRY_INVALID) { | ||
812 | if (gmap_is_shadow(gmap)) | ||
813 | spin_unlock(&gmap->guest_table_lock); | ||
814 | return NULL; | ||
815 | } | ||
816 | if (gmap_is_shadow(gmap)) { | ||
817 | *ptl = &gmap->guest_table_lock; | ||
818 | return pte_offset_map((pmd_t *) table, gaddr); | ||
819 | } | ||
820 | return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl); | ||
821 | } | ||
822 | |||
823 | /** | ||
824 | * gmap_pte_op_fixup - force a page in and connect the gmap page table | ||
825 | * @gmap: pointer to guest mapping meta data structure | ||
826 | * @gaddr: virtual address in the guest address space | ||
827 | * @vmaddr: address in the host process address space | ||
828 | * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE | ||
829 | * | ||
830 | * Returns 0 if the caller can retry __gmap_translate (might fail again), | ||
831 | * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing | ||
832 | * up or connecting the gmap page table. | ||
833 | */ | ||
834 | static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, | ||
835 | unsigned long vmaddr, int prot) | ||
836 | { | ||
837 | struct mm_struct *mm = gmap->mm; | ||
838 | unsigned int fault_flags; | ||
839 | bool unlocked = false; | ||
840 | |||
841 | BUG_ON(gmap_is_shadow(gmap)); | ||
842 | fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0; | ||
843 | if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked)) | ||
844 | return -EFAULT; | ||
845 | if (unlocked) | ||
846 | /* lost mmap_sem, caller has to retry __gmap_translate */ | ||
847 | return 0; | ||
848 | /* Connect the page tables */ | ||
849 | return __gmap_link(gmap, gaddr, vmaddr); | ||
575 | } | 850 | } |
576 | EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); | ||
577 | 851 | ||
578 | /** | 852 | /** |
579 | * gmap_ipte_notify - mark a range of ptes for invalidation notification | 853 | * gmap_pte_op_end - release the page table lock |
854 | * @ptl: pointer to the spinlock pointer | ||
855 | */ | ||
856 | static void gmap_pte_op_end(spinlock_t *ptl) | ||
857 | { | ||
858 | spin_unlock(ptl); | ||
859 | } | ||
860 | |||
861 | /* | ||
862 | * gmap_protect_range - remove access rights to memory and set pgste bits | ||
580 | * @gmap: pointer to guest mapping meta data structure | 863 | * @gmap: pointer to guest mapping meta data structure |
581 | * @gaddr: virtual address in the guest address space | 864 | * @gaddr: virtual address in the guest address space |
582 | * @len: size of area | 865 | * @len: size of area |
866 | * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE | ||
867 | * @bits: pgste notification bits to set | ||
583 | * | 868 | * |
584 | * Returns 0 if for each page in the given range a gmap mapping exists and | 869 | * Returns 0 if successfully protected, -ENOMEM if out of memory and |
585 | * the invalidation notification could be set. If the gmap mapping is missing | 870 | * -EFAULT if gaddr is invalid (or mapping for shadows is missing). |
586 | * for one or more pages -EFAULT is returned. If no memory could be allocated | 871 | * |
587 | * -ENOMEM is returned. This function establishes missing page table entries. | 872 | * Called with sg->mm->mmap_sem in read. |
873 | * | ||
874 | * Note: Can also be called for shadow gmaps. | ||
588 | */ | 875 | */ |
589 | int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) | 876 | static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, |
877 | unsigned long len, int prot, unsigned long bits) | ||
590 | { | 878 | { |
591 | unsigned long addr; | 879 | unsigned long vmaddr; |
592 | spinlock_t *ptl; | 880 | spinlock_t *ptl; |
593 | pte_t *ptep; | 881 | pte_t *ptep; |
594 | bool unlocked; | 882 | int rc; |
595 | int rc = 0; | 883 | |
884 | while (len) { | ||
885 | rc = -EAGAIN; | ||
886 | ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); | ||
887 | if (ptep) { | ||
888 | rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, bits); | ||
889 | gmap_pte_op_end(ptl); | ||
890 | } | ||
891 | if (rc) { | ||
892 | vmaddr = __gmap_translate(gmap, gaddr); | ||
893 | if (IS_ERR_VALUE(vmaddr)) | ||
894 | return vmaddr; | ||
895 | rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot); | ||
896 | if (rc) | ||
897 | return rc; | ||
898 | continue; | ||
899 | } | ||
900 | gaddr += PAGE_SIZE; | ||
901 | len -= PAGE_SIZE; | ||
902 | } | ||
903 | return 0; | ||
904 | } | ||
905 | |||
906 | /** | ||
907 | * gmap_mprotect_notify - change access rights for a range of ptes and | ||
908 | * call the notifier if any pte changes again | ||
909 | * @gmap: pointer to guest mapping meta data structure | ||
910 | * @gaddr: virtual address in the guest address space | ||
911 | * @len: size of area | ||
912 | * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE | ||
913 | * | ||
914 | * Returns 0 if for each page in the given range a gmap mapping exists, | ||
915 | * the new access rights could be set and the notifier could be armed. | ||
916 | * If the gmap mapping is missing for one or more pages -EFAULT is | ||
917 | * returned. If no memory could be allocated -ENOMEM is returned. | ||
918 | * This function establishes missing page table entries. | ||
919 | */ | ||
920 | int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr, | ||
921 | unsigned long len, int prot) | ||
922 | { | ||
923 | int rc; | ||
596 | 924 | ||
597 | if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) | 925 | if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap)) |
926 | return -EINVAL; | ||
927 | if (!MACHINE_HAS_ESOP && prot == PROT_READ) | ||
598 | return -EINVAL; | 928 | return -EINVAL; |
599 | down_read(&gmap->mm->mmap_sem); | 929 | down_read(&gmap->mm->mmap_sem); |
600 | while (len) { | 930 | rc = gmap_protect_range(gmap, gaddr, len, prot, PGSTE_IN_BIT); |
601 | unlocked = false; | 931 | up_read(&gmap->mm->mmap_sem); |
602 | /* Convert gmap address and connect the page tables */ | 932 | return rc; |
603 | addr = __gmap_translate(gmap, gaddr); | 933 | } |
604 | if (IS_ERR_VALUE(addr)) { | 934 | EXPORT_SYMBOL_GPL(gmap_mprotect_notify); |
605 | rc = addr; | 935 | |
936 | /** | ||
937 | * gmap_read_table - get an unsigned long value from a guest page table using | ||
938 | * absolute addressing, without marking the page referenced. | ||
939 | * @gmap: pointer to guest mapping meta data structure | ||
940 | * @gaddr: virtual address in the guest address space | ||
941 | * @val: pointer to the unsigned long value to return | ||
942 | * | ||
943 | * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT | ||
944 | * if reading using the virtual address failed. | ||
945 | * | ||
946 | * Called with gmap->mm->mmap_sem in read. | ||
947 | */ | ||
948 | int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) | ||
949 | { | ||
950 | unsigned long address, vmaddr; | ||
951 | spinlock_t *ptl; | ||
952 | pte_t *ptep, pte; | ||
953 | int rc; | ||
954 | |||
955 | while (1) { | ||
956 | rc = -EAGAIN; | ||
957 | ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); | ||
958 | if (ptep) { | ||
959 | pte = *ptep; | ||
960 | if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) { | ||
961 | address = pte_val(pte) & PAGE_MASK; | ||
962 | address += gaddr & ~PAGE_MASK; | ||
963 | *val = *(unsigned long *) address; | ||
964 | pte_val(*ptep) |= _PAGE_YOUNG; | ||
965 | /* Do *NOT* clear the _PAGE_INVALID bit! */ | ||
966 | rc = 0; | ||
967 | } | ||
968 | gmap_pte_op_end(ptl); | ||
969 | } | ||
970 | if (!rc) | ||
971 | break; | ||
972 | vmaddr = __gmap_translate(gmap, gaddr); | ||
973 | if (IS_ERR_VALUE(vmaddr)) { | ||
974 | rc = vmaddr; | ||
606 | break; | 975 | break; |
607 | } | 976 | } |
608 | /* Get the page mapped */ | 977 | rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, PROT_READ); |
609 | if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, | 978 | if (rc) |
610 | &unlocked)) { | ||
611 | rc = -EFAULT; | ||
612 | break; | 979 | break; |
980 | } | ||
981 | return rc; | ||
982 | } | ||
983 | EXPORT_SYMBOL_GPL(gmap_read_table); | ||
984 | |||
985 | /** | ||
986 | * gmap_insert_rmap - add a rmap to the host_to_rmap radix tree | ||
987 | * @sg: pointer to the shadow guest address space structure | ||
988 | * @vmaddr: vm address associated with the rmap | ||
989 | * @rmap: pointer to the rmap structure | ||
990 | * | ||
991 | * Called with the sg->guest_table_lock | ||
992 | */ | ||
993 | static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, | ||
994 | struct gmap_rmap *rmap) | ||
995 | { | ||
996 | void **slot; | ||
997 | |||
998 | BUG_ON(!gmap_is_shadow(sg)); | ||
999 | slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); | ||
1000 | if (slot) { | ||
1001 | rmap->next = radix_tree_deref_slot_protected(slot, | ||
1002 | &sg->guest_table_lock); | ||
1003 | radix_tree_replace_slot(slot, rmap); | ||
1004 | } else { | ||
1005 | rmap->next = NULL; | ||
1006 | radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT, | ||
1007 | rmap); | ||
1008 | } | ||
1009 | } | ||
1010 | |||
1011 | /** | ||
1012 | * gmap_protect_rmap - modify access rights to memory and create an rmap | ||
1013 | * @sg: pointer to the shadow guest address space structure | ||
1014 | * @raddr: rmap address in the shadow gmap | ||
1015 | * @paddr: address in the parent guest address space | ||
1016 | * @len: length of the memory area to protect | ||
1017 | * @prot: indicates access rights: none, read-only or read-write | ||
1018 | * | ||
1019 | * Returns 0 if successfully protected and the rmap was created, -ENOMEM | ||
1020 | * if out of memory and -EFAULT if paddr is invalid. | ||
1021 | */ | ||
1022 | static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, | ||
1023 | unsigned long paddr, unsigned long len, int prot) | ||
1024 | { | ||
1025 | struct gmap *parent; | ||
1026 | struct gmap_rmap *rmap; | ||
1027 | unsigned long vmaddr; | ||
1028 | spinlock_t *ptl; | ||
1029 | pte_t *ptep; | ||
1030 | int rc; | ||
1031 | |||
1032 | BUG_ON(!gmap_is_shadow(sg)); | ||
1033 | parent = sg->parent; | ||
1034 | while (len) { | ||
1035 | vmaddr = __gmap_translate(parent, paddr); | ||
1036 | if (IS_ERR_VALUE(vmaddr)) | ||
1037 | return vmaddr; | ||
1038 | rmap = kzalloc(sizeof(*rmap), GFP_KERNEL); | ||
1039 | if (!rmap) | ||
1040 | return -ENOMEM; | ||
1041 | rmap->raddr = raddr; | ||
1042 | rc = radix_tree_preload(GFP_KERNEL); | ||
1043 | if (rc) { | ||
1044 | kfree(rmap); | ||
1045 | return rc; | ||
1046 | } | ||
1047 | rc = -EAGAIN; | ||
1048 | ptep = gmap_pte_op_walk(parent, paddr, &ptl); | ||
1049 | if (ptep) { | ||
1050 | spin_lock(&sg->guest_table_lock); | ||
1051 | rc = ptep_force_prot(parent->mm, paddr, ptep, prot, | ||
1052 | PGSTE_VSIE_BIT); | ||
1053 | if (!rc) | ||
1054 | gmap_insert_rmap(sg, vmaddr, rmap); | ||
1055 | spin_unlock(&sg->guest_table_lock); | ||
1056 | gmap_pte_op_end(ptl); | ||
613 | } | 1057 | } |
614 | /* While trying to map mmap_sem got unlocked. Let us retry */ | 1058 | radix_tree_preload_end(); |
615 | if (unlocked) | 1059 | if (rc) { |
1060 | kfree(rmap); | ||
1061 | rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot); | ||
1062 | if (rc) | ||
1063 | return rc; | ||
616 | continue; | 1064 | continue; |
617 | rc = __gmap_link(gmap, gaddr, addr); | 1065 | } |
1066 | paddr += PAGE_SIZE; | ||
1067 | len -= PAGE_SIZE; | ||
1068 | } | ||
1069 | return 0; | ||
1070 | } | ||
1071 | |||
1072 | #define _SHADOW_RMAP_MASK 0x7 | ||
1073 | #define _SHADOW_RMAP_REGION1 0x5 | ||
1074 | #define _SHADOW_RMAP_REGION2 0x4 | ||
1075 | #define _SHADOW_RMAP_REGION3 0x3 | ||
1076 | #define _SHADOW_RMAP_SEGMENT 0x2 | ||
1077 | #define _SHADOW_RMAP_PGTABLE 0x1 | ||
1078 | |||
1079 | /** | ||
1080 | * gmap_idte_one - invalidate a single region or segment table entry | ||
1081 | * @asce: region or segment table *origin* + table-type bits | ||
1082 | * @vaddr: virtual address to identify the table entry to flush | ||
1083 | * | ||
1084 | * The invalid bit of a single region or segment table entry is set | ||
1085 | * and the associated TLB entries depending on the entry are flushed. | ||
1086 | * The table-type of the @asce identifies the portion of the @vaddr | ||
1087 | * that is used as the invalidation index. | ||
1088 | */ | ||
1089 | static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr) | ||
1090 | { | ||
1091 | asm volatile( | ||
1092 | " .insn rrf,0xb98e0000,%0,%1,0,0" | ||
1093 | : : "a" (asce), "a" (vaddr) : "cc", "memory"); | ||
1094 | } | ||
1095 | |||
1096 | /** | ||
1097 | * gmap_unshadow_page - remove a page from a shadow page table | ||
1098 | * @sg: pointer to the shadow guest address space structure | ||
1099 | * @raddr: rmap address in the shadow guest address space | ||
1100 | * | ||
1101 | * Called with the sg->guest_table_lock | ||
1102 | */ | ||
1103 | static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr) | ||
1104 | { | ||
1105 | unsigned long *table; | ||
1106 | |||
1107 | BUG_ON(!gmap_is_shadow(sg)); | ||
1108 | table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */ | ||
1109 | if (!table || *table & _PAGE_INVALID) | ||
1110 | return; | ||
1111 | gmap_call_notifier(sg, raddr, raddr + (1UL << 12) - 1); | ||
1112 | ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table); | ||
1113 | } | ||
1114 | |||
1115 | /** | ||
1116 | * __gmap_unshadow_pgt - remove all entries from a shadow page table | ||
1117 | * @sg: pointer to the shadow guest address space structure | ||
1118 | * @raddr: rmap address in the shadow guest address space | ||
1119 | * @pgt: pointer to the start of a shadow page table | ||
1120 | * | ||
1121 | * Called with the sg->guest_table_lock | ||
1122 | */ | ||
1123 | static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr, | ||
1124 | unsigned long *pgt) | ||
1125 | { | ||
1126 | int i; | ||
1127 | |||
1128 | BUG_ON(!gmap_is_shadow(sg)); | ||
1129 | for (i = 0; i < 256; i++, raddr += 1UL << 12) | ||
1130 | pgt[i] = _PAGE_INVALID; | ||
1131 | } | ||
1132 | |||
1133 | /** | ||
1134 | * gmap_unshadow_pgt - remove a shadow page table from a segment entry | ||
1135 | * @sg: pointer to the shadow guest address space structure | ||
1136 | * @raddr: address in the shadow guest address space | ||
1137 | * | ||
1138 | * Called with the sg->guest_table_lock | ||
1139 | */ | ||
1140 | static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) | ||
1141 | { | ||
1142 | unsigned long sto, *ste, *pgt; | ||
1143 | struct page *page; | ||
1144 | |||
1145 | BUG_ON(!gmap_is_shadow(sg)); | ||
1146 | ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */ | ||
1147 | if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN)) | ||
1148 | return; | ||
1149 | gmap_call_notifier(sg, raddr, raddr + (1UL << 20) - 1); | ||
1150 | sto = (unsigned long) (ste - ((raddr >> 20) & 0x7ff)); | ||
1151 | gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr); | ||
1152 | pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN); | ||
1153 | *ste = _SEGMENT_ENTRY_EMPTY; | ||
1154 | __gmap_unshadow_pgt(sg, raddr, pgt); | ||
1155 | /* Free page table */ | ||
1156 | page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT); | ||
1157 | list_del(&page->lru); | ||
1158 | page_table_free_pgste(page); | ||
1159 | } | ||
1160 | |||
1161 | /** | ||
1162 | * __gmap_unshadow_sgt - remove all entries from a shadow segment table | ||
1163 | * @sg: pointer to the shadow guest address space structure | ||
1164 | * @raddr: rmap address in the shadow guest address space | ||
1165 | * @sgt: pointer to the start of a shadow segment table | ||
1166 | * | ||
1167 | * Called with the sg->guest_table_lock | ||
1168 | */ | ||
1169 | static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, | ||
1170 | unsigned long *sgt) | ||
1171 | { | ||
1172 | unsigned long asce, *pgt; | ||
1173 | struct page *page; | ||
1174 | int i; | ||
1175 | |||
1176 | BUG_ON(!gmap_is_shadow(sg)); | ||
1177 | asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT; | ||
1178 | for (i = 0; i < 2048; i++, raddr += 1UL << 20) { | ||
1179 | if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) | ||
1180 | continue; | ||
1181 | pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN); | ||
1182 | sgt[i] = _SEGMENT_ENTRY_EMPTY; | ||
1183 | __gmap_unshadow_pgt(sg, raddr, pgt); | ||
1184 | /* Free page table */ | ||
1185 | page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT); | ||
1186 | list_del(&page->lru); | ||
1187 | page_table_free_pgste(page); | ||
1188 | } | ||
1189 | } | ||
1190 | |||
1191 | /** | ||
1192 | * gmap_unshadow_sgt - remove a shadow segment table from a region-3 entry | ||
1193 | * @sg: pointer to the shadow guest address space structure | ||
1194 | * @raddr: rmap address in the shadow guest address space | ||
1195 | * | ||
1196 | * Called with the shadow->guest_table_lock | ||
1197 | */ | ||
1198 | static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) | ||
1199 | { | ||
1200 | unsigned long r3o, *r3e, *sgt; | ||
1201 | struct page *page; | ||
1202 | |||
1203 | BUG_ON(!gmap_is_shadow(sg)); | ||
1204 | r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */ | ||
1205 | if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN)) | ||
1206 | return; | ||
1207 | gmap_call_notifier(sg, raddr, raddr + (1UL << 31) - 1); | ||
1208 | r3o = (unsigned long) (r3e - ((raddr >> 31) & 0x7ff)); | ||
1209 | gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr); | ||
1210 | sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN); | ||
1211 | *r3e = _REGION3_ENTRY_EMPTY; | ||
1212 | __gmap_unshadow_sgt(sg, raddr, sgt); | ||
1213 | /* Free segment table */ | ||
1214 | page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); | ||
1215 | list_del(&page->lru); | ||
1216 | __free_pages(page, 2); | ||
1217 | } | ||
1218 | |||
1219 | /** | ||
1220 | * __gmap_unshadow_r3t - remove all entries from a shadow region-3 table | ||
1221 | * @sg: pointer to the shadow guest address space structure | ||
1222 | * @raddr: address in the shadow guest address space | ||
1223 | * @r3t: pointer to the start of a shadow region-3 table | ||
1224 | * | ||
1225 | * Called with the sg->guest_table_lock | ||
1226 | */ | ||
1227 | static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, | ||
1228 | unsigned long *r3t) | ||
1229 | { | ||
1230 | unsigned long asce, *sgt; | ||
1231 | struct page *page; | ||
1232 | int i; | ||
1233 | |||
1234 | BUG_ON(!gmap_is_shadow(sg)); | ||
1235 | asce = (unsigned long) r3t | _ASCE_TYPE_REGION3; | ||
1236 | for (i = 0; i < 2048; i++, raddr += 1UL << 31) { | ||
1237 | if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) | ||
1238 | continue; | ||
1239 | sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN); | ||
1240 | r3t[i] = _REGION3_ENTRY_EMPTY; | ||
1241 | __gmap_unshadow_sgt(sg, raddr, sgt); | ||
1242 | /* Free segment table */ | ||
1243 | page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); | ||
1244 | list_del(&page->lru); | ||
1245 | __free_pages(page, 2); | ||
1246 | } | ||
1247 | } | ||
1248 | |||
1249 | /** | ||
1250 | * gmap_unshadow_r3t - remove a shadow region-3 table from a region-2 entry | ||
1251 | * @sg: pointer to the shadow guest address space structure | ||
1252 | * @raddr: rmap address in the shadow guest address space | ||
1253 | * | ||
1254 | * Called with the sg->guest_table_lock | ||
1255 | */ | ||
1256 | static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) | ||
1257 | { | ||
1258 | unsigned long r2o, *r2e, *r3t; | ||
1259 | struct page *page; | ||
1260 | |||
1261 | BUG_ON(!gmap_is_shadow(sg)); | ||
1262 | r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */ | ||
1263 | if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN)) | ||
1264 | return; | ||
1265 | gmap_call_notifier(sg, raddr, raddr + (1UL << 42) - 1); | ||
1266 | r2o = (unsigned long) (r2e - ((raddr >> 42) & 0x7ff)); | ||
1267 | gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr); | ||
1268 | r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN); | ||
1269 | *r2e = _REGION2_ENTRY_EMPTY; | ||
1270 | __gmap_unshadow_r3t(sg, raddr, r3t); | ||
1271 | /* Free region 3 table */ | ||
1272 | page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); | ||
1273 | list_del(&page->lru); | ||
1274 | __free_pages(page, 2); | ||
1275 | } | ||
1276 | |||
1277 | /** | ||
1278 | * __gmap_unshadow_r2t - remove all entries from a shadow region-2 table | ||
1279 | * @sg: pointer to the shadow guest address space structure | ||
1280 | * @raddr: rmap address in the shadow guest address space | ||
1281 | * @r2t: pointer to the start of a shadow region-2 table | ||
1282 | * | ||
1283 | * Called with the sg->guest_table_lock | ||
1284 | */ | ||
1285 | static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, | ||
1286 | unsigned long *r2t) | ||
1287 | { | ||
1288 | unsigned long asce, *r3t; | ||
1289 | struct page *page; | ||
1290 | int i; | ||
1291 | |||
1292 | BUG_ON(!gmap_is_shadow(sg)); | ||
1293 | asce = (unsigned long) r2t | _ASCE_TYPE_REGION2; | ||
1294 | for (i = 0; i < 2048; i++, raddr += 1UL << 42) { | ||
1295 | if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) | ||
1296 | continue; | ||
1297 | r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN); | ||
1298 | r2t[i] = _REGION2_ENTRY_EMPTY; | ||
1299 | __gmap_unshadow_r3t(sg, raddr, r3t); | ||
1300 | /* Free region 3 table */ | ||
1301 | page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); | ||
1302 | list_del(&page->lru); | ||
1303 | __free_pages(page, 2); | ||
1304 | } | ||
1305 | } | ||
1306 | |||
1307 | /** | ||
1308 | * gmap_unshadow_r2t - remove a shadow region-2 table from a region-1 entry | ||
1309 | * @sg: pointer to the shadow guest address space structure | ||
1310 | * @raddr: rmap address in the shadow guest address space | ||
1311 | * | ||
1312 | * Called with the sg->guest_table_lock | ||
1313 | */ | ||
1314 | static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) | ||
1315 | { | ||
1316 | unsigned long r1o, *r1e, *r2t; | ||
1317 | struct page *page; | ||
1318 | |||
1319 | BUG_ON(!gmap_is_shadow(sg)); | ||
1320 | r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */ | ||
1321 | if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN)) | ||
1322 | return; | ||
1323 | gmap_call_notifier(sg, raddr, raddr + (1UL << 53) - 1); | ||
1324 | r1o = (unsigned long) (r1e - ((raddr >> 53) & 0x7ff)); | ||
1325 | gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr); | ||
1326 | r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN); | ||
1327 | *r1e = _REGION1_ENTRY_EMPTY; | ||
1328 | __gmap_unshadow_r2t(sg, raddr, r2t); | ||
1329 | /* Free region 2 table */ | ||
1330 | page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); | ||
1331 | list_del(&page->lru); | ||
1332 | __free_pages(page, 2); | ||
1333 | } | ||
1334 | |||
1335 | /** | ||
1336 | * __gmap_unshadow_r1t - remove all entries from a shadow region-1 table | ||
1337 | * @sg: pointer to the shadow guest address space structure | ||
1338 | * @raddr: rmap address in the shadow guest address space | ||
1339 | * @r1t: pointer to the start of a shadow region-1 table | ||
1340 | * | ||
1341 | * Called with the shadow->guest_table_lock | ||
1342 | */ | ||
1343 | static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, | ||
1344 | unsigned long *r1t) | ||
1345 | { | ||
1346 | unsigned long asce, *r2t; | ||
1347 | struct page *page; | ||
1348 | int i; | ||
1349 | |||
1350 | BUG_ON(!gmap_is_shadow(sg)); | ||
1351 | asce = (unsigned long) r1t | _ASCE_TYPE_REGION1; | ||
1352 | for (i = 0; i < 2048; i++, raddr += 1UL << 53) { | ||
1353 | if (!(r1t[i] & _REGION_ENTRY_ORIGIN)) | ||
1354 | continue; | ||
1355 | r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN); | ||
1356 | __gmap_unshadow_r2t(sg, raddr, r2t); | ||
1357 | /* Clear entry and flush translation r1t -> r2t */ | ||
1358 | gmap_idte_one(asce, raddr); | ||
1359 | r1t[i] = _REGION1_ENTRY_EMPTY; | ||
1360 | /* Free region 2 table */ | ||
1361 | page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); | ||
1362 | list_del(&page->lru); | ||
1363 | __free_pages(page, 2); | ||
1364 | } | ||
1365 | } | ||
1366 | |||
1367 | /** | ||
1368 | * gmap_unshadow - remove a shadow page table completely | ||
1369 | * @sg: pointer to the shadow guest address space structure | ||
1370 | * | ||
1371 | * Called with sg->guest_table_lock | ||
1372 | */ | ||
1373 | static void gmap_unshadow(struct gmap *sg) | ||
1374 | { | ||
1375 | unsigned long *table; | ||
1376 | |||
1377 | BUG_ON(!gmap_is_shadow(sg)); | ||
1378 | if (sg->removed) | ||
1379 | return; | ||
1380 | sg->removed = 1; | ||
1381 | gmap_call_notifier(sg, 0, -1UL); | ||
1382 | gmap_flush_tlb(sg); | ||
1383 | table = (unsigned long *)(sg->asce & _ASCE_ORIGIN); | ||
1384 | switch (sg->asce & _ASCE_TYPE_MASK) { | ||
1385 | case _ASCE_TYPE_REGION1: | ||
1386 | __gmap_unshadow_r1t(sg, 0, table); | ||
1387 | break; | ||
1388 | case _ASCE_TYPE_REGION2: | ||
1389 | __gmap_unshadow_r2t(sg, 0, table); | ||
1390 | break; | ||
1391 | case _ASCE_TYPE_REGION3: | ||
1392 | __gmap_unshadow_r3t(sg, 0, table); | ||
1393 | break; | ||
1394 | case _ASCE_TYPE_SEGMENT: | ||
1395 | __gmap_unshadow_sgt(sg, 0, table); | ||
1396 | break; | ||
1397 | } | ||
1398 | } | ||
1399 | |||
1400 | /** | ||
1401 | * gmap_find_shadow - find a specific asce in the list of shadow tables | ||
1402 | * @parent: pointer to the parent gmap | ||
1403 | * @asce: ASCE for which the shadow table is created | ||
1404 | * @edat_level: edat level to be used for the shadow translation | ||
1405 | * | ||
1406 | * Returns the pointer to a gmap if a shadow table with the given asce is | ||
1407 | * already available, ERR_PTR(-EAGAIN) if another one is just being created, | ||
1408 | * otherwise NULL | ||
1409 | */ | ||
1410 | static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, | ||
1411 | int edat_level) | ||
1412 | { | ||
1413 | struct gmap *sg; | ||
1414 | |||
1415 | list_for_each_entry(sg, &parent->children, list) { | ||
1416 | if (sg->orig_asce != asce || sg->edat_level != edat_level || | ||
1417 | sg->removed) | ||
1418 | continue; | ||
1419 | if (!sg->initialized) | ||
1420 | return ERR_PTR(-EAGAIN); | ||
1421 | atomic_inc(&sg->ref_count); | ||
1422 | return sg; | ||
1423 | } | ||
1424 | return NULL; | ||
1425 | } | ||
1426 | |||
1427 | /** | ||
1428 | * gmap_shadow_valid - check if a shadow guest address space matches the | ||
1429 | * given properties and is still valid | ||
1430 | * @sg: pointer to the shadow guest address space structure | ||
1431 | * @asce: ASCE for which the shadow table is requested | ||
1432 | * @edat_level: edat level to be used for the shadow translation | ||
1433 | * | ||
1434 | * Returns 1 if the gmap shadow is still valid and matches the given | ||
1435 | * properties, the caller can continue using it. Returns 0 otherwise, the | ||
1436 | * caller has to request a new shadow gmap in this case. | ||
1437 | * | ||
1438 | */ | ||
1439 | int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level) | ||
1440 | { | ||
1441 | if (sg->removed) | ||
1442 | return 0; | ||
1443 | return sg->orig_asce == asce && sg->edat_level == edat_level; | ||
1444 | } | ||
1445 | EXPORT_SYMBOL_GPL(gmap_shadow_valid); | ||
1446 | |||
1447 | /** | ||
1448 | * gmap_shadow - create/find a shadow guest address space | ||
1449 | * @parent: pointer to the parent gmap | ||
1450 | * @asce: ASCE for which the shadow table is created | ||
1451 | * @edat_level: edat level to be used for the shadow translation | ||
1452 | * | ||
1453 | * The pages of the top level page table referred by the asce parameter | ||
1454 | * will be set to read-only and marked in the PGSTEs of the kvm process. | ||
1455 | * The shadow table will be removed automatically on any change to the | ||
1456 | * PTE mapping for the source table. | ||
1457 | * | ||
1458 | * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory, | ||
1459 | * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the | ||
1460 | * parent gmap table could not be protected. | ||
1461 | */ | ||
1462 | struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, | ||
1463 | int edat_level) | ||
1464 | { | ||
1465 | struct gmap *sg, *new; | ||
1466 | unsigned long limit; | ||
1467 | int rc; | ||
1468 | |||
1469 | BUG_ON(gmap_is_shadow(parent)); | ||
1470 | spin_lock(&parent->shadow_lock); | ||
1471 | sg = gmap_find_shadow(parent, asce, edat_level); | ||
1472 | spin_unlock(&parent->shadow_lock); | ||
1473 | if (sg) | ||
1474 | return sg; | ||
1475 | /* Create a new shadow gmap */ | ||
1476 | limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11)); | ||
1477 | if (asce & _ASCE_REAL_SPACE) | ||
1478 | limit = -1UL; | ||
1479 | new = gmap_alloc(limit); | ||
1480 | if (!new) | ||
1481 | return ERR_PTR(-ENOMEM); | ||
1482 | new->mm = parent->mm; | ||
1483 | new->parent = gmap_get(parent); | ||
1484 | new->orig_asce = asce; | ||
1485 | new->edat_level = edat_level; | ||
1486 | new->initialized = false; | ||
1487 | spin_lock(&parent->shadow_lock); | ||
1488 | /* Recheck if another CPU created the same shadow */ | ||
1489 | sg = gmap_find_shadow(parent, asce, edat_level); | ||
1490 | if (sg) { | ||
1491 | spin_unlock(&parent->shadow_lock); | ||
1492 | gmap_free(new); | ||
1493 | return sg; | ||
1494 | } | ||
1495 | if (asce & _ASCE_REAL_SPACE) { | ||
1496 | /* only allow one real-space gmap shadow */ | ||
1497 | list_for_each_entry(sg, &parent->children, list) { | ||
1498 | if (sg->orig_asce & _ASCE_REAL_SPACE) { | ||
1499 | spin_lock(&sg->guest_table_lock); | ||
1500 | gmap_unshadow(sg); | ||
1501 | spin_unlock(&sg->guest_table_lock); | ||
1502 | list_del(&sg->list); | ||
1503 | gmap_put(sg); | ||
1504 | break; | ||
1505 | } | ||
1506 | } | ||
1507 | } | ||
1508 | atomic_set(&new->ref_count, 2); | ||
1509 | list_add(&new->list, &parent->children); | ||
1510 | if (asce & _ASCE_REAL_SPACE) { | ||
1511 | /* nothing to protect, return right away */ | ||
1512 | new->initialized = true; | ||
1513 | spin_unlock(&parent->shadow_lock); | ||
1514 | return new; | ||
1515 | } | ||
1516 | spin_unlock(&parent->shadow_lock); | ||
1517 | /* protect after insertion, so it will get properly invalidated */ | ||
1518 | down_read(&parent->mm->mmap_sem); | ||
1519 | rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, | ||
1520 | ((asce & _ASCE_TABLE_LENGTH) + 1) * 4096, | ||
1521 | PROT_READ, PGSTE_VSIE_BIT); | ||
1522 | up_read(&parent->mm->mmap_sem); | ||
1523 | spin_lock(&parent->shadow_lock); | ||
1524 | new->initialized = true; | ||
1525 | if (rc) { | ||
1526 | list_del(&new->list); | ||
1527 | gmap_free(new); | ||
1528 | new = ERR_PTR(rc); | ||
1529 | } | ||
1530 | spin_unlock(&parent->shadow_lock); | ||
1531 | return new; | ||
1532 | } | ||
1533 | EXPORT_SYMBOL_GPL(gmap_shadow); | ||
1534 | |||
1535 | /** | ||
1536 | * gmap_shadow_r2t - create an empty shadow region 2 table | ||
1537 | * @sg: pointer to the shadow guest address space structure | ||
1538 | * @saddr: faulting address in the shadow gmap | ||
1539 | * @r2t: parent gmap address of the region 2 table to get shadowed | ||
1540 | * @fake: r2t references contiguous guest memory block, not a r2t | ||
1541 | * | ||
1542 | * The r2t parameter specifies the address of the source table. The | ||
1543 | * four pages of the source table are made read-only in the parent gmap | ||
1544 | * address space. A write to the source table area @r2t will automatically | ||
1545 | * remove the shadow r2 table and all of its decendents. | ||
1546 | * | ||
1547 | * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the | ||
1548 | * shadow table structure is incomplete, -ENOMEM if out of memory and | ||
1549 | * -EFAULT if an address in the parent gmap could not be resolved. | ||
1550 | * | ||
1551 | * Called with sg->mm->mmap_sem in read. | ||
1552 | */ | ||
1553 | int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, | ||
1554 | int fake) | ||
1555 | { | ||
1556 | unsigned long raddr, origin, offset, len; | ||
1557 | unsigned long *s_r2t, *table; | ||
1558 | struct page *page; | ||
1559 | int rc; | ||
1560 | |||
1561 | BUG_ON(!gmap_is_shadow(sg)); | ||
1562 | /* Allocate a shadow region second table */ | ||
1563 | page = alloc_pages(GFP_KERNEL, 2); | ||
1564 | if (!page) | ||
1565 | return -ENOMEM; | ||
1566 | page->index = r2t & _REGION_ENTRY_ORIGIN; | ||
1567 | if (fake) | ||
1568 | page->index |= GMAP_SHADOW_FAKE_TABLE; | ||
1569 | s_r2t = (unsigned long *) page_to_phys(page); | ||
1570 | /* Install shadow region second table */ | ||
1571 | spin_lock(&sg->guest_table_lock); | ||
1572 | table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */ | ||
1573 | if (!table) { | ||
1574 | rc = -EAGAIN; /* Race with unshadow */ | ||
1575 | goto out_free; | ||
1576 | } | ||
1577 | if (!(*table & _REGION_ENTRY_INVALID)) { | ||
1578 | rc = 0; /* Already established */ | ||
1579 | goto out_free; | ||
1580 | } else if (*table & _REGION_ENTRY_ORIGIN) { | ||
1581 | rc = -EAGAIN; /* Race with shadow */ | ||
1582 | goto out_free; | ||
1583 | } | ||
1584 | crst_table_init(s_r2t, _REGION2_ENTRY_EMPTY); | ||
1585 | /* mark as invalid as long as the parent table is not protected */ | ||
1586 | *table = (unsigned long) s_r2t | _REGION_ENTRY_LENGTH | | ||
1587 | _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID; | ||
1588 | if (sg->edat_level >= 1) | ||
1589 | *table |= (r2t & _REGION_ENTRY_PROTECT); | ||
1590 | list_add(&page->lru, &sg->crst_list); | ||
1591 | if (fake) { | ||
1592 | /* nothing to protect for fake tables */ | ||
1593 | *table &= ~_REGION_ENTRY_INVALID; | ||
1594 | spin_unlock(&sg->guest_table_lock); | ||
1595 | return 0; | ||
1596 | } | ||
1597 | spin_unlock(&sg->guest_table_lock); | ||
1598 | /* Make r2t read-only in parent gmap page table */ | ||
1599 | raddr = (saddr & 0xffe0000000000000UL) | _SHADOW_RMAP_REGION1; | ||
1600 | origin = r2t & _REGION_ENTRY_ORIGIN; | ||
1601 | offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * 4096; | ||
1602 | len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; | ||
1603 | rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); | ||
1604 | spin_lock(&sg->guest_table_lock); | ||
1605 | if (!rc) { | ||
1606 | table = gmap_table_walk(sg, saddr, 4); | ||
1607 | if (!table || (*table & _REGION_ENTRY_ORIGIN) != | ||
1608 | (unsigned long) s_r2t) | ||
1609 | rc = -EAGAIN; /* Race with unshadow */ | ||
1610 | else | ||
1611 | *table &= ~_REGION_ENTRY_INVALID; | ||
1612 | } else { | ||
1613 | gmap_unshadow_r2t(sg, raddr); | ||
1614 | } | ||
1615 | spin_unlock(&sg->guest_table_lock); | ||
1616 | return rc; | ||
1617 | out_free: | ||
1618 | spin_unlock(&sg->guest_table_lock); | ||
1619 | __free_pages(page, 2); | ||
1620 | return rc; | ||
1621 | } | ||
1622 | EXPORT_SYMBOL_GPL(gmap_shadow_r2t); | ||
1623 | |||
1624 | /** | ||
1625 | * gmap_shadow_r3t - create a shadow region 3 table | ||
1626 | * @sg: pointer to the shadow guest address space structure | ||
1627 | * @saddr: faulting address in the shadow gmap | ||
1628 | * @r3t: parent gmap address of the region 3 table to get shadowed | ||
1629 | * @fake: r3t references contiguous guest memory block, not a r3t | ||
1630 | * | ||
1631 | * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the | ||
1632 | * shadow table structure is incomplete, -ENOMEM if out of memory and | ||
1633 | * -EFAULT if an address in the parent gmap could not be resolved. | ||
1634 | * | ||
1635 | * Called with sg->mm->mmap_sem in read. | ||
1636 | */ | ||
1637 | int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, | ||
1638 | int fake) | ||
1639 | { | ||
1640 | unsigned long raddr, origin, offset, len; | ||
1641 | unsigned long *s_r3t, *table; | ||
1642 | struct page *page; | ||
1643 | int rc; | ||
1644 | |||
1645 | BUG_ON(!gmap_is_shadow(sg)); | ||
1646 | /* Allocate a shadow region second table */ | ||
1647 | page = alloc_pages(GFP_KERNEL, 2); | ||
1648 | if (!page) | ||
1649 | return -ENOMEM; | ||
1650 | page->index = r3t & _REGION_ENTRY_ORIGIN; | ||
1651 | if (fake) | ||
1652 | page->index |= GMAP_SHADOW_FAKE_TABLE; | ||
1653 | s_r3t = (unsigned long *) page_to_phys(page); | ||
1654 | /* Install shadow region second table */ | ||
1655 | spin_lock(&sg->guest_table_lock); | ||
1656 | table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */ | ||
1657 | if (!table) { | ||
1658 | rc = -EAGAIN; /* Race with unshadow */ | ||
1659 | goto out_free; | ||
1660 | } | ||
1661 | if (!(*table & _REGION_ENTRY_INVALID)) { | ||
1662 | rc = 0; /* Already established */ | ||
1663 | goto out_free; | ||
1664 | } else if (*table & _REGION_ENTRY_ORIGIN) { | ||
1665 | rc = -EAGAIN; /* Race with shadow */ | ||
1666 | } | ||
1667 | crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY); | ||
1668 | /* mark as invalid as long as the parent table is not protected */ | ||
1669 | *table = (unsigned long) s_r3t | _REGION_ENTRY_LENGTH | | ||
1670 | _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID; | ||
1671 | if (sg->edat_level >= 1) | ||
1672 | *table |= (r3t & _REGION_ENTRY_PROTECT); | ||
1673 | list_add(&page->lru, &sg->crst_list); | ||
1674 | if (fake) { | ||
1675 | /* nothing to protect for fake tables */ | ||
1676 | *table &= ~_REGION_ENTRY_INVALID; | ||
1677 | spin_unlock(&sg->guest_table_lock); | ||
1678 | return 0; | ||
1679 | } | ||
1680 | spin_unlock(&sg->guest_table_lock); | ||
1681 | /* Make r3t read-only in parent gmap page table */ | ||
1682 | raddr = (saddr & 0xfffffc0000000000UL) | _SHADOW_RMAP_REGION2; | ||
1683 | origin = r3t & _REGION_ENTRY_ORIGIN; | ||
1684 | offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * 4096; | ||
1685 | len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; | ||
1686 | rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); | ||
1687 | spin_lock(&sg->guest_table_lock); | ||
1688 | if (!rc) { | ||
1689 | table = gmap_table_walk(sg, saddr, 3); | ||
1690 | if (!table || (*table & _REGION_ENTRY_ORIGIN) != | ||
1691 | (unsigned long) s_r3t) | ||
1692 | rc = -EAGAIN; /* Race with unshadow */ | ||
1693 | else | ||
1694 | *table &= ~_REGION_ENTRY_INVALID; | ||
1695 | } else { | ||
1696 | gmap_unshadow_r3t(sg, raddr); | ||
1697 | } | ||
1698 | spin_unlock(&sg->guest_table_lock); | ||
1699 | return rc; | ||
1700 | out_free: | ||
1701 | spin_unlock(&sg->guest_table_lock); | ||
1702 | __free_pages(page, 2); | ||
1703 | return rc; | ||
1704 | } | ||
1705 | EXPORT_SYMBOL_GPL(gmap_shadow_r3t); | ||
1706 | |||
1707 | /** | ||
1708 | * gmap_shadow_sgt - create a shadow segment table | ||
1709 | * @sg: pointer to the shadow guest address space structure | ||
1710 | * @saddr: faulting address in the shadow gmap | ||
1711 | * @sgt: parent gmap address of the segment table to get shadowed | ||
1712 | * @fake: sgt references contiguous guest memory block, not a sgt | ||
1713 | * | ||
1714 | * Returns: 0 if successfully shadowed or already shadowed, -EAGAIN if the | ||
1715 | * shadow table structure is incomplete, -ENOMEM if out of memory and | ||
1716 | * -EFAULT if an address in the parent gmap could not be resolved. | ||
1717 | * | ||
1718 | * Called with sg->mm->mmap_sem in read. | ||
1719 | */ | ||
1720 | int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, | ||
1721 | int fake) | ||
1722 | { | ||
1723 | unsigned long raddr, origin, offset, len; | ||
1724 | unsigned long *s_sgt, *table; | ||
1725 | struct page *page; | ||
1726 | int rc; | ||
1727 | |||
1728 | BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE)); | ||
1729 | /* Allocate a shadow segment table */ | ||
1730 | page = alloc_pages(GFP_KERNEL, 2); | ||
1731 | if (!page) | ||
1732 | return -ENOMEM; | ||
1733 | page->index = sgt & _REGION_ENTRY_ORIGIN; | ||
1734 | if (fake) | ||
1735 | page->index |= GMAP_SHADOW_FAKE_TABLE; | ||
1736 | s_sgt = (unsigned long *) page_to_phys(page); | ||
1737 | /* Install shadow region second table */ | ||
1738 | spin_lock(&sg->guest_table_lock); | ||
1739 | table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */ | ||
1740 | if (!table) { | ||
1741 | rc = -EAGAIN; /* Race with unshadow */ | ||
1742 | goto out_free; | ||
1743 | } | ||
1744 | if (!(*table & _REGION_ENTRY_INVALID)) { | ||
1745 | rc = 0; /* Already established */ | ||
1746 | goto out_free; | ||
1747 | } else if (*table & _REGION_ENTRY_ORIGIN) { | ||
1748 | rc = -EAGAIN; /* Race with shadow */ | ||
1749 | goto out_free; | ||
1750 | } | ||
1751 | crst_table_init(s_sgt, _SEGMENT_ENTRY_EMPTY); | ||
1752 | /* mark as invalid as long as the parent table is not protected */ | ||
1753 | *table = (unsigned long) s_sgt | _REGION_ENTRY_LENGTH | | ||
1754 | _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID; | ||
1755 | if (sg->edat_level >= 1) | ||
1756 | *table |= sgt & _REGION_ENTRY_PROTECT; | ||
1757 | list_add(&page->lru, &sg->crst_list); | ||
1758 | if (fake) { | ||
1759 | /* nothing to protect for fake tables */ | ||
1760 | *table &= ~_REGION_ENTRY_INVALID; | ||
1761 | spin_unlock(&sg->guest_table_lock); | ||
1762 | return 0; | ||
1763 | } | ||
1764 | spin_unlock(&sg->guest_table_lock); | ||
1765 | /* Make sgt read-only in parent gmap page table */ | ||
1766 | raddr = (saddr & 0xffffffff80000000UL) | _SHADOW_RMAP_REGION3; | ||
1767 | origin = sgt & _REGION_ENTRY_ORIGIN; | ||
1768 | offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * 4096; | ||
1769 | len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; | ||
1770 | rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); | ||
1771 | spin_lock(&sg->guest_table_lock); | ||
1772 | if (!rc) { | ||
1773 | table = gmap_table_walk(sg, saddr, 2); | ||
1774 | if (!table || (*table & _REGION_ENTRY_ORIGIN) != | ||
1775 | (unsigned long) s_sgt) | ||
1776 | rc = -EAGAIN; /* Race with unshadow */ | ||
1777 | else | ||
1778 | *table &= ~_REGION_ENTRY_INVALID; | ||
1779 | } else { | ||
1780 | gmap_unshadow_sgt(sg, raddr); | ||
1781 | } | ||
1782 | spin_unlock(&sg->guest_table_lock); | ||
1783 | return rc; | ||
1784 | out_free: | ||
1785 | spin_unlock(&sg->guest_table_lock); | ||
1786 | __free_pages(page, 2); | ||
1787 | return rc; | ||
1788 | } | ||
1789 | EXPORT_SYMBOL_GPL(gmap_shadow_sgt); | ||
1790 | |||
1791 | /** | ||
1792 | * gmap_shadow_lookup_pgtable - find a shadow page table | ||
1793 | * @sg: pointer to the shadow guest address space structure | ||
1794 | * @saddr: the address in the shadow aguest address space | ||
1795 | * @pgt: parent gmap address of the page table to get shadowed | ||
1796 | * @dat_protection: if the pgtable is marked as protected by dat | ||
1797 | * @fake: pgt references contiguous guest memory block, not a pgtable | ||
1798 | * | ||
1799 | * Returns 0 if the shadow page table was found and -EAGAIN if the page | ||
1800 | * table was not found. | ||
1801 | * | ||
1802 | * Called with sg->mm->mmap_sem in read. | ||
1803 | */ | ||
1804 | int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, | ||
1805 | unsigned long *pgt, int *dat_protection, | ||
1806 | int *fake) | ||
1807 | { | ||
1808 | unsigned long *table; | ||
1809 | struct page *page; | ||
1810 | int rc; | ||
1811 | |||
1812 | BUG_ON(!gmap_is_shadow(sg)); | ||
1813 | spin_lock(&sg->guest_table_lock); | ||
1814 | table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ | ||
1815 | if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { | ||
1816 | /* Shadow page tables are full pages (pte+pgste) */ | ||
1817 | page = pfn_to_page(*table >> PAGE_SHIFT); | ||
1818 | *pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE; | ||
1819 | *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); | ||
1820 | *fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE); | ||
1821 | rc = 0; | ||
1822 | } else { | ||
1823 | rc = -EAGAIN; | ||
1824 | } | ||
1825 | spin_unlock(&sg->guest_table_lock); | ||
1826 | return rc; | ||
1827 | |||
1828 | } | ||
1829 | EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup); | ||
1830 | |||
1831 | /** | ||
1832 | * gmap_shadow_pgt - instantiate a shadow page table | ||
1833 | * @sg: pointer to the shadow guest address space structure | ||
1834 | * @saddr: faulting address in the shadow gmap | ||
1835 | * @pgt: parent gmap address of the page table to get shadowed | ||
1836 | * @fake: pgt references contiguous guest memory block, not a pgtable | ||
1837 | * | ||
1838 | * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the | ||
1839 | * shadow table structure is incomplete, -ENOMEM if out of memory, | ||
1840 | * -EFAULT if an address in the parent gmap could not be resolved and | ||
1841 | * | ||
1842 | * Called with gmap->mm->mmap_sem in read | ||
1843 | */ | ||
1844 | int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, | ||
1845 | int fake) | ||
1846 | { | ||
1847 | unsigned long raddr, origin; | ||
1848 | unsigned long *s_pgt, *table; | ||
1849 | struct page *page; | ||
1850 | int rc; | ||
1851 | |||
1852 | BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE)); | ||
1853 | /* Allocate a shadow page table */ | ||
1854 | page = page_table_alloc_pgste(sg->mm); | ||
1855 | if (!page) | ||
1856 | return -ENOMEM; | ||
1857 | page->index = pgt & _SEGMENT_ENTRY_ORIGIN; | ||
1858 | if (fake) | ||
1859 | page->index |= GMAP_SHADOW_FAKE_TABLE; | ||
1860 | s_pgt = (unsigned long *) page_to_phys(page); | ||
1861 | /* Install shadow page table */ | ||
1862 | spin_lock(&sg->guest_table_lock); | ||
1863 | table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ | ||
1864 | if (!table) { | ||
1865 | rc = -EAGAIN; /* Race with unshadow */ | ||
1866 | goto out_free; | ||
1867 | } | ||
1868 | if (!(*table & _SEGMENT_ENTRY_INVALID)) { | ||
1869 | rc = 0; /* Already established */ | ||
1870 | goto out_free; | ||
1871 | } else if (*table & _SEGMENT_ENTRY_ORIGIN) { | ||
1872 | rc = -EAGAIN; /* Race with shadow */ | ||
1873 | goto out_free; | ||
1874 | } | ||
1875 | /* mark as invalid as long as the parent table is not protected */ | ||
1876 | *table = (unsigned long) s_pgt | _SEGMENT_ENTRY | | ||
1877 | (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID; | ||
1878 | list_add(&page->lru, &sg->pt_list); | ||
1879 | if (fake) { | ||
1880 | /* nothing to protect for fake tables */ | ||
1881 | *table &= ~_SEGMENT_ENTRY_INVALID; | ||
1882 | spin_unlock(&sg->guest_table_lock); | ||
1883 | return 0; | ||
1884 | } | ||
1885 | spin_unlock(&sg->guest_table_lock); | ||
1886 | /* Make pgt read-only in parent gmap page table (not the pgste) */ | ||
1887 | raddr = (saddr & 0xfffffffffff00000UL) | _SHADOW_RMAP_SEGMENT; | ||
1888 | origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK; | ||
1889 | rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ); | ||
1890 | spin_lock(&sg->guest_table_lock); | ||
1891 | if (!rc) { | ||
1892 | table = gmap_table_walk(sg, saddr, 1); | ||
1893 | if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != | ||
1894 | (unsigned long) s_pgt) | ||
1895 | rc = -EAGAIN; /* Race with unshadow */ | ||
1896 | else | ||
1897 | *table &= ~_SEGMENT_ENTRY_INVALID; | ||
1898 | } else { | ||
1899 | gmap_unshadow_pgt(sg, raddr); | ||
1900 | } | ||
1901 | spin_unlock(&sg->guest_table_lock); | ||
1902 | return rc; | ||
1903 | out_free: | ||
1904 | spin_unlock(&sg->guest_table_lock); | ||
1905 | page_table_free_pgste(page); | ||
1906 | return rc; | ||
1907 | |||
1908 | } | ||
1909 | EXPORT_SYMBOL_GPL(gmap_shadow_pgt); | ||
1910 | |||
1911 | /** | ||
1912 | * gmap_shadow_page - create a shadow page mapping | ||
1913 | * @sg: pointer to the shadow guest address space structure | ||
1914 | * @saddr: faulting address in the shadow gmap | ||
1915 | * @pte: pte in parent gmap address space to get shadowed | ||
1916 | * | ||
1917 | * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the | ||
1918 | * shadow table structure is incomplete, -ENOMEM if out of memory and | ||
1919 | * -EFAULT if an address in the parent gmap could not be resolved. | ||
1920 | * | ||
1921 | * Called with sg->mm->mmap_sem in read. | ||
1922 | */ | ||
1923 | int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) | ||
1924 | { | ||
1925 | struct gmap *parent; | ||
1926 | struct gmap_rmap *rmap; | ||
1927 | unsigned long vmaddr, paddr; | ||
1928 | spinlock_t *ptl; | ||
1929 | pte_t *sptep, *tptep; | ||
1930 | int prot; | ||
1931 | int rc; | ||
1932 | |||
1933 | BUG_ON(!gmap_is_shadow(sg)); | ||
1934 | parent = sg->parent; | ||
1935 | prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE; | ||
1936 | |||
1937 | rmap = kzalloc(sizeof(*rmap), GFP_KERNEL); | ||
1938 | if (!rmap) | ||
1939 | return -ENOMEM; | ||
1940 | rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE; | ||
1941 | |||
1942 | while (1) { | ||
1943 | paddr = pte_val(pte) & PAGE_MASK; | ||
1944 | vmaddr = __gmap_translate(parent, paddr); | ||
1945 | if (IS_ERR_VALUE(vmaddr)) { | ||
1946 | rc = vmaddr; | ||
1947 | break; | ||
1948 | } | ||
1949 | rc = radix_tree_preload(GFP_KERNEL); | ||
618 | if (rc) | 1950 | if (rc) |
619 | break; | 1951 | break; |
620 | /* Walk the process page table, lock and get pte pointer */ | 1952 | rc = -EAGAIN; |
621 | ptep = get_locked_pte(gmap->mm, addr, &ptl); | 1953 | sptep = gmap_pte_op_walk(parent, paddr, &ptl); |
622 | VM_BUG_ON(!ptep); | 1954 | if (sptep) { |
623 | /* Set notification bit in the pgste of the pte */ | 1955 | spin_lock(&sg->guest_table_lock); |
624 | if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { | 1956 | /* Get page table pointer */ |
625 | ptep_set_notify(gmap->mm, addr, ptep); | 1957 | tptep = (pte_t *) gmap_table_walk(sg, saddr, 0); |
626 | gaddr += PAGE_SIZE; | 1958 | if (!tptep) { |
627 | len -= PAGE_SIZE; | 1959 | spin_unlock(&sg->guest_table_lock); |
1960 | gmap_pte_op_end(ptl); | ||
1961 | radix_tree_preload_end(); | ||
1962 | break; | ||
1963 | } | ||
1964 | rc = ptep_shadow_pte(sg->mm, saddr, sptep, tptep, pte); | ||
1965 | if (rc > 0) { | ||
1966 | /* Success and a new mapping */ | ||
1967 | gmap_insert_rmap(sg, vmaddr, rmap); | ||
1968 | rmap = NULL; | ||
1969 | rc = 0; | ||
1970 | } | ||
1971 | gmap_pte_op_end(ptl); | ||
1972 | spin_unlock(&sg->guest_table_lock); | ||
628 | } | 1973 | } |
629 | pte_unmap_unlock(ptep, ptl); | 1974 | radix_tree_preload_end(); |
1975 | if (!rc) | ||
1976 | break; | ||
1977 | rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot); | ||
1978 | if (rc) | ||
1979 | break; | ||
630 | } | 1980 | } |
631 | up_read(&gmap->mm->mmap_sem); | 1981 | kfree(rmap); |
632 | return rc; | 1982 | return rc; |
633 | } | 1983 | } |
634 | EXPORT_SYMBOL_GPL(gmap_ipte_notify); | 1984 | EXPORT_SYMBOL_GPL(gmap_shadow_page); |
1985 | |||
1986 | /** | ||
1987 | * gmap_shadow_notify - handle notifications for shadow gmap | ||
1988 | * | ||
1989 | * Called with sg->parent->shadow_lock. | ||
1990 | */ | ||
1991 | static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, | ||
1992 | unsigned long offset, pte_t *pte) | ||
1993 | { | ||
1994 | struct gmap_rmap *rmap, *rnext, *head; | ||
1995 | unsigned long gaddr, start, end, bits, raddr; | ||
1996 | unsigned long *table; | ||
1997 | |||
1998 | BUG_ON(!gmap_is_shadow(sg)); | ||
1999 | spin_lock(&sg->parent->guest_table_lock); | ||
2000 | table = radix_tree_lookup(&sg->parent->host_to_guest, | ||
2001 | vmaddr >> PMD_SHIFT); | ||
2002 | gaddr = table ? __gmap_segment_gaddr(table) + offset : 0; | ||
2003 | spin_unlock(&sg->parent->guest_table_lock); | ||
2004 | if (!table) | ||
2005 | return; | ||
2006 | |||
2007 | spin_lock(&sg->guest_table_lock); | ||
2008 | if (sg->removed) { | ||
2009 | spin_unlock(&sg->guest_table_lock); | ||
2010 | return; | ||
2011 | } | ||
2012 | /* Check for top level table */ | ||
2013 | start = sg->orig_asce & _ASCE_ORIGIN; | ||
2014 | end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * 4096; | ||
2015 | if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start && | ||
2016 | gaddr < end) { | ||
2017 | /* The complete shadow table has to go */ | ||
2018 | gmap_unshadow(sg); | ||
2019 | spin_unlock(&sg->guest_table_lock); | ||
2020 | list_del(&sg->list); | ||
2021 | gmap_put(sg); | ||
2022 | return; | ||
2023 | } | ||
2024 | /* Remove the page table tree from on specific entry */ | ||
2025 | head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> 12); | ||
2026 | gmap_for_each_rmap_safe(rmap, rnext, head) { | ||
2027 | bits = rmap->raddr & _SHADOW_RMAP_MASK; | ||
2028 | raddr = rmap->raddr ^ bits; | ||
2029 | switch (bits) { | ||
2030 | case _SHADOW_RMAP_REGION1: | ||
2031 | gmap_unshadow_r2t(sg, raddr); | ||
2032 | break; | ||
2033 | case _SHADOW_RMAP_REGION2: | ||
2034 | gmap_unshadow_r3t(sg, raddr); | ||
2035 | break; | ||
2036 | case _SHADOW_RMAP_REGION3: | ||
2037 | gmap_unshadow_sgt(sg, raddr); | ||
2038 | break; | ||
2039 | case _SHADOW_RMAP_SEGMENT: | ||
2040 | gmap_unshadow_pgt(sg, raddr); | ||
2041 | break; | ||
2042 | case _SHADOW_RMAP_PGTABLE: | ||
2043 | gmap_unshadow_page(sg, raddr); | ||
2044 | break; | ||
2045 | } | ||
2046 | kfree(rmap); | ||
2047 | } | ||
2048 | spin_unlock(&sg->guest_table_lock); | ||
2049 | } | ||
635 | 2050 | ||
636 | /** | 2051 | /** |
637 | * ptep_notify - call all invalidation callbacks for a specific pte. | 2052 | * ptep_notify - call all invalidation callbacks for a specific pte. |
638 | * @mm: pointer to the process mm_struct | 2053 | * @mm: pointer to the process mm_struct |
639 | * @addr: virtual address in the process address space | 2054 | * @addr: virtual address in the process address space |
640 | * @pte: pointer to the page table entry | 2055 | * @pte: pointer to the page table entry |
2056 | * @bits: bits from the pgste that caused the notify call | ||
641 | * | 2057 | * |
642 | * This function is assumed to be called with the page table lock held | 2058 | * This function is assumed to be called with the page table lock held |
643 | * for the pte to notify. | 2059 | * for the pte to notify. |
644 | */ | 2060 | */ |
645 | void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) | 2061 | void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, |
2062 | pte_t *pte, unsigned long bits) | ||
646 | { | 2063 | { |
647 | unsigned long offset, gaddr; | 2064 | unsigned long offset, gaddr; |
648 | unsigned long *table; | 2065 | unsigned long *table; |
649 | struct gmap_notifier *nb; | 2066 | struct gmap *gmap, *sg, *next; |
650 | struct gmap *gmap; | ||
651 | 2067 | ||
652 | offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); | 2068 | offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); |
653 | offset = offset * (4096 / sizeof(pte_t)); | 2069 | offset = offset * (4096 / sizeof(pte_t)); |
654 | spin_lock(&gmap_notifier_lock); | 2070 | rcu_read_lock(); |
655 | list_for_each_entry(gmap, &mm->context.gmap_list, list) { | 2071 | list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { |
2072 | if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { | ||
2073 | spin_lock(&gmap->shadow_lock); | ||
2074 | list_for_each_entry_safe(sg, next, | ||
2075 | &gmap->children, list) | ||
2076 | gmap_shadow_notify(sg, vmaddr, offset, pte); | ||
2077 | spin_unlock(&gmap->shadow_lock); | ||
2078 | } | ||
2079 | if (!(bits & PGSTE_IN_BIT)) | ||
2080 | continue; | ||
2081 | spin_lock(&gmap->guest_table_lock); | ||
656 | table = radix_tree_lookup(&gmap->host_to_guest, | 2082 | table = radix_tree_lookup(&gmap->host_to_guest, |
657 | vmaddr >> PMD_SHIFT); | 2083 | vmaddr >> PMD_SHIFT); |
658 | if (!table) | 2084 | if (table) |
659 | continue; | 2085 | gaddr = __gmap_segment_gaddr(table) + offset; |
660 | gaddr = __gmap_segment_gaddr(table) + offset; | 2086 | spin_unlock(&gmap->guest_table_lock); |
661 | list_for_each_entry(nb, &gmap_notifier_list, list) | 2087 | if (table) |
662 | nb->notifier_call(gmap, gaddr); | 2088 | gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1); |
663 | } | 2089 | } |
664 | spin_unlock(&gmap_notifier_lock); | 2090 | rcu_read_unlock(); |
665 | } | 2091 | } |
666 | EXPORT_SYMBOL_GPL(ptep_notify); | 2092 | EXPORT_SYMBOL_GPL(ptep_notify); |
667 | 2093 | ||
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index e2565d2d0c32..995f78532cc2 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c | |||
@@ -137,6 +137,29 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) | |||
137 | return new; | 137 | return new; |
138 | } | 138 | } |
139 | 139 | ||
140 | #ifdef CONFIG_PGSTE | ||
141 | |||
142 | struct page *page_table_alloc_pgste(struct mm_struct *mm) | ||
143 | { | ||
144 | struct page *page; | ||
145 | unsigned long *table; | ||
146 | |||
147 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); | ||
148 | if (page) { | ||
149 | table = (unsigned long *) page_to_phys(page); | ||
150 | clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); | ||
151 | clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); | ||
152 | } | ||
153 | return page; | ||
154 | } | ||
155 | |||
156 | void page_table_free_pgste(struct page *page) | ||
157 | { | ||
158 | __free_page(page); | ||
159 | } | ||
160 | |||
161 | #endif /* CONFIG_PGSTE */ | ||
162 | |||
140 | /* | 163 | /* |
141 | * page table entry allocation/free routines. | 164 | * page table entry allocation/free routines. |
142 | */ | 165 | */ |
@@ -149,7 +172,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
149 | /* Try to get a fragment of a 4K page as a 2K page table */ | 172 | /* Try to get a fragment of a 4K page as a 2K page table */ |
150 | if (!mm_alloc_pgste(mm)) { | 173 | if (!mm_alloc_pgste(mm)) { |
151 | table = NULL; | 174 | table = NULL; |
152 | spin_lock_bh(&mm->context.list_lock); | 175 | spin_lock_bh(&mm->context.pgtable_lock); |
153 | if (!list_empty(&mm->context.pgtable_list)) { | 176 | if (!list_empty(&mm->context.pgtable_list)) { |
154 | page = list_first_entry(&mm->context.pgtable_list, | 177 | page = list_first_entry(&mm->context.pgtable_list, |
155 | struct page, lru); | 178 | struct page, lru); |
@@ -164,7 +187,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
164 | list_del(&page->lru); | 187 | list_del(&page->lru); |
165 | } | 188 | } |
166 | } | 189 | } |
167 | spin_unlock_bh(&mm->context.list_lock); | 190 | spin_unlock_bh(&mm->context.pgtable_lock); |
168 | if (table) | 191 | if (table) |
169 | return table; | 192 | return table; |
170 | } | 193 | } |
@@ -187,9 +210,9 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
187 | /* Return the first 2K fragment of the page */ | 210 | /* Return the first 2K fragment of the page */ |
188 | atomic_set(&page->_mapcount, 1); | 211 | atomic_set(&page->_mapcount, 1); |
189 | clear_table(table, _PAGE_INVALID, PAGE_SIZE); | 212 | clear_table(table, _PAGE_INVALID, PAGE_SIZE); |
190 | spin_lock_bh(&mm->context.list_lock); | 213 | spin_lock_bh(&mm->context.pgtable_lock); |
191 | list_add(&page->lru, &mm->context.pgtable_list); | 214 | list_add(&page->lru, &mm->context.pgtable_list); |
192 | spin_unlock_bh(&mm->context.list_lock); | 215 | spin_unlock_bh(&mm->context.pgtable_lock); |
193 | } | 216 | } |
194 | return table; | 217 | return table; |
195 | } | 218 | } |
@@ -203,13 +226,13 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) | |||
203 | if (!mm_alloc_pgste(mm)) { | 226 | if (!mm_alloc_pgste(mm)) { |
204 | /* Free 2K page table fragment of a 4K page */ | 227 | /* Free 2K page table fragment of a 4K page */ |
205 | bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); | 228 | bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); |
206 | spin_lock_bh(&mm->context.list_lock); | 229 | spin_lock_bh(&mm->context.pgtable_lock); |
207 | mask = atomic_xor_bits(&page->_mapcount, 1U << bit); | 230 | mask = atomic_xor_bits(&page->_mapcount, 1U << bit); |
208 | if (mask & 3) | 231 | if (mask & 3) |
209 | list_add(&page->lru, &mm->context.pgtable_list); | 232 | list_add(&page->lru, &mm->context.pgtable_list); |
210 | else | 233 | else |
211 | list_del(&page->lru); | 234 | list_del(&page->lru); |
212 | spin_unlock_bh(&mm->context.list_lock); | 235 | spin_unlock_bh(&mm->context.pgtable_lock); |
213 | if (mask != 0) | 236 | if (mask != 0) |
214 | return; | 237 | return; |
215 | } | 238 | } |
@@ -235,13 +258,13 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, | |||
235 | return; | 258 | return; |
236 | } | 259 | } |
237 | bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); | 260 | bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); |
238 | spin_lock_bh(&mm->context.list_lock); | 261 | spin_lock_bh(&mm->context.pgtable_lock); |
239 | mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); | 262 | mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); |
240 | if (mask & 3) | 263 | if (mask & 3) |
241 | list_add_tail(&page->lru, &mm->context.pgtable_list); | 264 | list_add_tail(&page->lru, &mm->context.pgtable_list); |
242 | else | 265 | else |
243 | list_del(&page->lru); | 266 | list_del(&page->lru); |
244 | spin_unlock_bh(&mm->context.list_lock); | 267 | spin_unlock_bh(&mm->context.pgtable_lock); |
245 | table = (unsigned long *) (__pa(table) | (1U << bit)); | 268 | table = (unsigned long *) (__pa(table) | (1U << bit)); |
246 | tlb_remove_table(tlb, table); | 269 | tlb_remove_table(tlb, table); |
247 | } | 270 | } |
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index b98d1a152d46..5f092015aaa7 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -174,14 +174,17 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) | |||
174 | return pgste; | 174 | return pgste; |
175 | } | 175 | } |
176 | 176 | ||
177 | static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, | 177 | static inline pgste_t pgste_pte_notify(struct mm_struct *mm, |
178 | unsigned long addr, | 178 | unsigned long addr, |
179 | pte_t *ptep, pgste_t pgste) | 179 | pte_t *ptep, pgste_t pgste) |
180 | { | 180 | { |
181 | #ifdef CONFIG_PGSTE | 181 | #ifdef CONFIG_PGSTE |
182 | if (pgste_val(pgste) & PGSTE_IN_BIT) { | 182 | unsigned long bits; |
183 | pgste_val(pgste) &= ~PGSTE_IN_BIT; | 183 | |
184 | ptep_notify(mm, addr, ptep); | 184 | bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT); |
185 | if (bits) { | ||
186 | pgste_val(pgste) ^= bits; | ||
187 | ptep_notify(mm, addr, ptep, bits); | ||
185 | } | 188 | } |
186 | #endif | 189 | #endif |
187 | return pgste; | 190 | return pgste; |
@@ -194,7 +197,7 @@ static inline pgste_t ptep_xchg_start(struct mm_struct *mm, | |||
194 | 197 | ||
195 | if (mm_has_pgste(mm)) { | 198 | if (mm_has_pgste(mm)) { |
196 | pgste = pgste_get_lock(ptep); | 199 | pgste = pgste_get_lock(ptep); |
197 | pgste = pgste_ipte_notify(mm, addr, ptep, pgste); | 200 | pgste = pgste_pte_notify(mm, addr, ptep, pgste); |
198 | } | 201 | } |
199 | return pgste; | 202 | return pgste; |
200 | } | 203 | } |
@@ -459,6 +462,90 @@ void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |||
459 | preempt_enable(); | 462 | preempt_enable(); |
460 | } | 463 | } |
461 | 464 | ||
465 | /** | ||
466 | * ptep_force_prot - change access rights of a locked pte | ||
467 | * @mm: pointer to the process mm_struct | ||
468 | * @addr: virtual address in the guest address space | ||
469 | * @ptep: pointer to the page table entry | ||
470 | * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE | ||
471 | * @bit: pgste bit to set (e.g. for notification) | ||
472 | * | ||
473 | * Returns 0 if the access rights were changed and -EAGAIN if the current | ||
474 | * and requested access rights are incompatible. | ||
475 | */ | ||
476 | int ptep_force_prot(struct mm_struct *mm, unsigned long addr, | ||
477 | pte_t *ptep, int prot, unsigned long bit) | ||
478 | { | ||
479 | pte_t entry; | ||
480 | pgste_t pgste; | ||
481 | int pte_i, pte_p; | ||
482 | |||
483 | pgste = pgste_get_lock(ptep); | ||
484 | entry = *ptep; | ||
485 | /* Check pte entry after all locks have been acquired */ | ||
486 | pte_i = pte_val(entry) & _PAGE_INVALID; | ||
487 | pte_p = pte_val(entry) & _PAGE_PROTECT; | ||
488 | if ((pte_i && (prot != PROT_NONE)) || | ||
489 | (pte_p && (prot & PROT_WRITE))) { | ||
490 | pgste_set_unlock(ptep, pgste); | ||
491 | return -EAGAIN; | ||
492 | } | ||
493 | /* Change access rights and set pgste bit */ | ||
494 | if (prot == PROT_NONE && !pte_i) { | ||
495 | ptep_flush_direct(mm, addr, ptep); | ||
496 | pgste = pgste_update_all(entry, pgste, mm); | ||
497 | pte_val(entry) |= _PAGE_INVALID; | ||
498 | } | ||
499 | if (prot == PROT_READ && !pte_p) { | ||
500 | ptep_flush_direct(mm, addr, ptep); | ||
501 | pte_val(entry) &= ~_PAGE_INVALID; | ||
502 | pte_val(entry) |= _PAGE_PROTECT; | ||
503 | } | ||
504 | pgste_val(pgste) |= bit; | ||
505 | pgste = pgste_set_pte(ptep, pgste, entry); | ||
506 | pgste_set_unlock(ptep, pgste); | ||
507 | return 0; | ||
508 | } | ||
509 | |||
510 | int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, | ||
511 | pte_t *sptep, pte_t *tptep, pte_t pte) | ||
512 | { | ||
513 | pgste_t spgste, tpgste; | ||
514 | pte_t spte, tpte; | ||
515 | int rc = -EAGAIN; | ||
516 | |||
517 | if (!(pte_val(*tptep) & _PAGE_INVALID)) | ||
518 | return 0; /* already shadowed */ | ||
519 | spgste = pgste_get_lock(sptep); | ||
520 | spte = *sptep; | ||
521 | if (!(pte_val(spte) & _PAGE_INVALID) && | ||
522 | !((pte_val(spte) & _PAGE_PROTECT) && | ||
523 | !(pte_val(pte) & _PAGE_PROTECT))) { | ||
524 | pgste_val(spgste) |= PGSTE_VSIE_BIT; | ||
525 | tpgste = pgste_get_lock(tptep); | ||
526 | pte_val(tpte) = (pte_val(spte) & PAGE_MASK) | | ||
527 | (pte_val(pte) & _PAGE_PROTECT); | ||
528 | /* don't touch the storage key - it belongs to parent pgste */ | ||
529 | tpgste = pgste_set_pte(tptep, tpgste, tpte); | ||
530 | pgste_set_unlock(tptep, tpgste); | ||
531 | rc = 1; | ||
532 | } | ||
533 | pgste_set_unlock(sptep, spgste); | ||
534 | return rc; | ||
535 | } | ||
536 | |||
537 | void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) | ||
538 | { | ||
539 | pgste_t pgste; | ||
540 | |||
541 | pgste = pgste_get_lock(ptep); | ||
542 | /* notifier is called by the caller */ | ||
543 | ptep_flush_direct(mm, saddr, ptep); | ||
544 | /* don't touch the storage key - it belongs to parent pgste */ | ||
545 | pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); | ||
546 | pgste_set_unlock(ptep, pgste); | ||
547 | } | ||
548 | |||
462 | static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) | 549 | static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) |
463 | { | 550 | { |
464 | if (!non_swap_entry(entry)) | 551 | if (!non_swap_entry(entry)) |
@@ -532,7 +619,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) | |||
532 | pgste_val(pgste) &= ~PGSTE_UC_BIT; | 619 | pgste_val(pgste) &= ~PGSTE_UC_BIT; |
533 | pte = *ptep; | 620 | pte = *ptep; |
534 | if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { | 621 | if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { |
535 | pgste = pgste_ipte_notify(mm, addr, ptep, pgste); | 622 | pgste = pgste_pte_notify(mm, addr, ptep, pgste); |
536 | __ptep_ipte(addr, ptep); | 623 | __ptep_ipte(addr, ptep); |
537 | if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) | 624 | if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) |
538 | pte_val(pte) |= _PAGE_PROTECT; | 625 | pte_val(pte) |= _PAGE_PROTECT; |
@@ -555,12 +642,9 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, | |||
555 | pgste_t old, new; | 642 | pgste_t old, new; |
556 | pte_t *ptep; | 643 | pte_t *ptep; |
557 | 644 | ||
558 | down_read(&mm->mmap_sem); | ||
559 | ptep = get_locked_pte(mm, addr, &ptl); | 645 | ptep = get_locked_pte(mm, addr, &ptl); |
560 | if (unlikely(!ptep)) { | 646 | if (unlikely(!ptep)) |
561 | up_read(&mm->mmap_sem); | ||
562 | return -EFAULT; | 647 | return -EFAULT; |
563 | } | ||
564 | 648 | ||
565 | new = old = pgste_get_lock(ptep); | 649 | new = old = pgste_get_lock(ptep); |
566 | pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | | 650 | pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | |
@@ -587,45 +671,100 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, | |||
587 | 671 | ||
588 | pgste_set_unlock(ptep, new); | 672 | pgste_set_unlock(ptep, new); |
589 | pte_unmap_unlock(ptep, ptl); | 673 | pte_unmap_unlock(ptep, ptl); |
590 | up_read(&mm->mmap_sem); | ||
591 | return 0; | 674 | return 0; |
592 | } | 675 | } |
593 | EXPORT_SYMBOL(set_guest_storage_key); | 676 | EXPORT_SYMBOL(set_guest_storage_key); |
594 | 677 | ||
595 | unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr) | 678 | /** |
679 | * Conditionally set a guest storage key (handling csske). | ||
680 | * oldkey will be updated when either mr or mc is set and a pointer is given. | ||
681 | * | ||
682 | * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest | ||
683 | * storage key was updated and -EFAULT on access errors. | ||
684 | */ | ||
685 | int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, | ||
686 | unsigned char key, unsigned char *oldkey, | ||
687 | bool nq, bool mr, bool mc) | ||
688 | { | ||
689 | unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT; | ||
690 | int rc; | ||
691 | |||
692 | /* we can drop the pgste lock between getting and setting the key */ | ||
693 | if (mr | mc) { | ||
694 | rc = get_guest_storage_key(current->mm, addr, &tmp); | ||
695 | if (rc) | ||
696 | return rc; | ||
697 | if (oldkey) | ||
698 | *oldkey = tmp; | ||
699 | if (!mr) | ||
700 | mask |= _PAGE_REFERENCED; | ||
701 | if (!mc) | ||
702 | mask |= _PAGE_CHANGED; | ||
703 | if (!((tmp ^ key) & mask)) | ||
704 | return 0; | ||
705 | } | ||
706 | rc = set_guest_storage_key(current->mm, addr, key, nq); | ||
707 | return rc < 0 ? rc : 1; | ||
708 | } | ||
709 | EXPORT_SYMBOL(cond_set_guest_storage_key); | ||
710 | |||
711 | /** | ||
712 | * Reset a guest reference bit (rrbe), returning the reference and changed bit. | ||
713 | * | ||
714 | * Returns < 0 in case of error, otherwise the cc to be reported to the guest. | ||
715 | */ | ||
716 | int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) | ||
596 | { | 717 | { |
597 | unsigned char key; | ||
598 | spinlock_t *ptl; | 718 | spinlock_t *ptl; |
599 | pgste_t pgste; | 719 | pgste_t old, new; |
600 | pte_t *ptep; | 720 | pte_t *ptep; |
721 | int cc = 0; | ||
601 | 722 | ||
602 | down_read(&mm->mmap_sem); | ||
603 | ptep = get_locked_pte(mm, addr, &ptl); | 723 | ptep = get_locked_pte(mm, addr, &ptl); |
604 | if (unlikely(!ptep)) { | 724 | if (unlikely(!ptep)) |
605 | up_read(&mm->mmap_sem); | ||
606 | return -EFAULT; | 725 | return -EFAULT; |
607 | } | ||
608 | pgste = pgste_get_lock(ptep); | ||
609 | 726 | ||
610 | if (pte_val(*ptep) & _PAGE_INVALID) { | 727 | new = old = pgste_get_lock(ptep); |
611 | key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; | 728 | /* Reset guest reference bit only */ |
612 | key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; | 729 | pgste_val(new) &= ~PGSTE_GR_BIT; |
613 | key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48; | ||
614 | key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48; | ||
615 | } else { | ||
616 | key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); | ||
617 | 730 | ||
618 | /* Reflect guest's logical view, not physical */ | 731 | if (!(pte_val(*ptep) & _PAGE_INVALID)) { |
619 | if (pgste_val(pgste) & PGSTE_GR_BIT) | 732 | cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); |
620 | key |= _PAGE_REFERENCED; | 733 | /* Merge real referenced bit into host-set */ |
621 | if (pgste_val(pgste) & PGSTE_GC_BIT) | 734 | pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; |
622 | key |= _PAGE_CHANGED; | ||
623 | } | 735 | } |
736 | /* Reflect guest's logical view, not physical */ | ||
737 | cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49; | ||
738 | /* Changing the guest storage key is considered a change of the page */ | ||
739 | if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT) | ||
740 | pgste_val(new) |= PGSTE_UC_BIT; | ||
741 | |||
742 | pgste_set_unlock(ptep, new); | ||
743 | pte_unmap_unlock(ptep, ptl); | ||
744 | return 0; | ||
745 | } | ||
746 | EXPORT_SYMBOL(reset_guest_reference_bit); | ||
747 | |||
748 | int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, | ||
749 | unsigned char *key) | ||
750 | { | ||
751 | spinlock_t *ptl; | ||
752 | pgste_t pgste; | ||
753 | pte_t *ptep; | ||
624 | 754 | ||
755 | ptep = get_locked_pte(mm, addr, &ptl); | ||
756 | if (unlikely(!ptep)) | ||
757 | return -EFAULT; | ||
758 | |||
759 | pgste = pgste_get_lock(ptep); | ||
760 | *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; | ||
761 | if (!(pte_val(*ptep) & _PAGE_INVALID)) | ||
762 | *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); | ||
763 | /* Reflect guest's logical view, not physical */ | ||
764 | *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; | ||
625 | pgste_set_unlock(ptep, pgste); | 765 | pgste_set_unlock(ptep, pgste); |
626 | pte_unmap_unlock(ptep, ptl); | 766 | pte_unmap_unlock(ptep, ptl); |
627 | up_read(&mm->mmap_sem); | 767 | return 0; |
628 | return key; | ||
629 | } | 768 | } |
630 | EXPORT_SYMBOL(get_guest_storage_key); | 769 | EXPORT_SYMBOL(get_guest_storage_key); |
631 | #endif | 770 | #endif |