summaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorAlexey Kardashevskiy <aik@ozlabs.ru>2018-07-17 03:19:13 -0400
committerMichael Ellerman <mpe@ellerman.id.au>2018-07-18 02:17:17 -0400
commit76fa4975f3ed12d15762bc979ca44078598ed8ee (patch)
treee2b533446a8517d4bd3b080d85d0b6e9373effbc /arch/powerpc
parent1463edca6734d42ab4406fa2896e20b45478ea36 (diff)
KVM: PPC: Check if IOMMU page is contained in the pinned physical page
A VM which has: - a DMA capable device passed through to it (eg. network card); - running a malicious kernel that ignores H_PUT_TCE failure; - capability of using IOMMU pages bigger that physical pages can create an IOMMU mapping that exposes (for example) 16MB of the host physical memory to the device when only 64K was allocated to the VM. The remaining 16MB - 64K will be some other content of host memory, possibly including pages of the VM, but also pages of host kernel memory, host programs or other VMs. The attacking VM does not control the location of the page it can map, and is only allowed to map as many pages as it has pages of RAM. We already have a check in drivers/vfio/vfio_iommu_spapr_tce.c that an IOMMU page is contained in the physical page so the PCI hardware won't get access to unassigned host memory; however this check is missing in the KVM fastpath (H_PUT_TCE accelerated code). We were lucky so far and did not hit this yet as the very first time when the mapping happens we do not have tbl::it_userspace allocated yet and fall back to the userspace which in turn calls VFIO IOMMU driver, this fails and the guest does not retry, This stores the smallest preregistered page size in the preregistered region descriptor and changes the mm_iommu_xxx API to check this against the IOMMU page size. This calculates maximum page size as a minimum of the natural region alignment and compound page size. For the page shift this uses the shift returned by find_linux_pte() which indicates how the page is mapped to the current userspace - if the page is huge and this is not a zero, then it is a leaf pte and the page is mapped within the range. Fixes: 121f80ba68f1 ("KVM: PPC: VFIO: Add in-kernel acceleration for VFIO") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/include/asm/mmu_context.h4
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c6
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c37
4 files changed, 42 insertions, 7 deletions
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 896efa559996..79d570cbf332 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -35,9 +35,9 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(
35extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, 35extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
36 unsigned long ua, unsigned long entries); 36 unsigned long ua, unsigned long entries);
37extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, 37extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
38 unsigned long ua, unsigned long *hpa); 38 unsigned long ua, unsigned int pageshift, unsigned long *hpa);
39extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, 39extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
40 unsigned long ua, unsigned long *hpa); 40 unsigned long ua, unsigned int pageshift, unsigned long *hpa);
41extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); 41extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
42extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); 42extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
43#endif 43#endif
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index d066e37551ec..8c456fa691a5 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -449,7 +449,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
449 /* This only handles v2 IOMMU type, v1 is handled via ioctl() */ 449 /* This only handles v2 IOMMU type, v1 is handled via ioctl() */
450 return H_TOO_HARD; 450 return H_TOO_HARD;
451 451
452 if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa))) 452 if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa)))
453 return H_HARDWARE; 453 return H_HARDWARE;
454 454
455 if (mm_iommu_mapped_inc(mem)) 455 if (mm_iommu_mapped_inc(mem))
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 925fc316a104..5b298f5a1a14 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -279,7 +279,8 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
279 if (!mem) 279 if (!mem)
280 return H_TOO_HARD; 280 return H_TOO_HARD;
281 281
282 if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa))) 282 if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift,
283 &hpa)))
283 return H_HARDWARE; 284 return H_HARDWARE;
284 285
285 pua = (void *) vmalloc_to_phys(pua); 286 pua = (void *) vmalloc_to_phys(pua);
@@ -469,7 +470,8 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
469 470
470 mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); 471 mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
471 if (mem) 472 if (mem)
472 prereg = mm_iommu_ua_to_hpa_rm(mem, ua, &tces) == 0; 473 prereg = mm_iommu_ua_to_hpa_rm(mem, ua,
474 IOMMU_PAGE_SHIFT_4K, &tces) == 0;
473 } 475 }
474 476
475 if (!prereg) { 477 if (!prereg) {
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index abb43646927a..a4ca57612558 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -19,6 +19,7 @@
19#include <linux/hugetlb.h> 19#include <linux/hugetlb.h>
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <asm/mmu_context.h> 21#include <asm/mmu_context.h>
22#include <asm/pte-walk.h>
22 23
23static DEFINE_MUTEX(mem_list_mutex); 24static DEFINE_MUTEX(mem_list_mutex);
24 25
@@ -27,6 +28,7 @@ struct mm_iommu_table_group_mem_t {
27 struct rcu_head rcu; 28 struct rcu_head rcu;
28 unsigned long used; 29 unsigned long used;
29 atomic64_t mapped; 30 atomic64_t mapped;
31 unsigned int pageshift;
30 u64 ua; /* userspace address */ 32 u64 ua; /* userspace address */
31 u64 entries; /* number of entries in hpas[] */ 33 u64 entries; /* number of entries in hpas[] */
32 u64 *hpas; /* vmalloc'ed */ 34 u64 *hpas; /* vmalloc'ed */
@@ -125,6 +127,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
125{ 127{
126 struct mm_iommu_table_group_mem_t *mem; 128 struct mm_iommu_table_group_mem_t *mem;
127 long i, j, ret = 0, locked_entries = 0; 129 long i, j, ret = 0, locked_entries = 0;
130 unsigned int pageshift;
131 unsigned long flags;
128 struct page *page = NULL; 132 struct page *page = NULL;
129 133
130 mutex_lock(&mem_list_mutex); 134 mutex_lock(&mem_list_mutex);
@@ -159,6 +163,12 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
159 goto unlock_exit; 163 goto unlock_exit;
160 } 164 }
161 165
166 /*
167 * For a starting point for a maximum page size calculation
168 * we use @ua and @entries natural alignment to allow IOMMU pages
169 * smaller than huge pages but still bigger than PAGE_SIZE.
170 */
171 mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT));
162 mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0]))); 172 mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
163 if (!mem->hpas) { 173 if (!mem->hpas) {
164 kfree(mem); 174 kfree(mem);
@@ -199,6 +209,23 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
199 } 209 }
200 } 210 }
201populate: 211populate:
212 pageshift = PAGE_SHIFT;
213 if (PageCompound(page)) {
214 pte_t *pte;
215 struct page *head = compound_head(page);
216 unsigned int compshift = compound_order(head);
217
218 local_irq_save(flags); /* disables as well */
219 pte = find_linux_pte(mm->pgd, ua, NULL, &pageshift);
220 local_irq_restore(flags);
221
222 /* Double check it is still the same pinned page */
223 if (pte && pte_page(*pte) == head &&
224 pageshift == compshift)
225 pageshift = max_t(unsigned int, pageshift,
226 PAGE_SHIFT);
227 }
228 mem->pageshift = min(mem->pageshift, pageshift);
202 mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; 229 mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
203 } 230 }
204 231
@@ -349,7 +376,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
349EXPORT_SYMBOL_GPL(mm_iommu_find); 376EXPORT_SYMBOL_GPL(mm_iommu_find);
350 377
351long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, 378long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
352 unsigned long ua, unsigned long *hpa) 379 unsigned long ua, unsigned int pageshift, unsigned long *hpa)
353{ 380{
354 const long entry = (ua - mem->ua) >> PAGE_SHIFT; 381 const long entry = (ua - mem->ua) >> PAGE_SHIFT;
355 u64 *va = &mem->hpas[entry]; 382 u64 *va = &mem->hpas[entry];
@@ -357,6 +384,9 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
357 if (entry >= mem->entries) 384 if (entry >= mem->entries)
358 return -EFAULT; 385 return -EFAULT;
359 386
387 if (pageshift > mem->pageshift)
388 return -EFAULT;
389
360 *hpa = *va | (ua & ~PAGE_MASK); 390 *hpa = *va | (ua & ~PAGE_MASK);
361 391
362 return 0; 392 return 0;
@@ -364,7 +394,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
364EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); 394EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
365 395
366long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, 396long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
367 unsigned long ua, unsigned long *hpa) 397 unsigned long ua, unsigned int pageshift, unsigned long *hpa)
368{ 398{
369 const long entry = (ua - mem->ua) >> PAGE_SHIFT; 399 const long entry = (ua - mem->ua) >> PAGE_SHIFT;
370 void *va = &mem->hpas[entry]; 400 void *va = &mem->hpas[entry];
@@ -373,6 +403,9 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
373 if (entry >= mem->entries) 403 if (entry >= mem->entries)
374 return -EFAULT; 404 return -EFAULT;
375 405
406 if (pageshift > mem->pageshift)
407 return -EFAULT;
408
376 pa = (void *) vmalloc_to_phys(va); 409 pa = (void *) vmalloc_to_phys(va);
377 if (!pa) 410 if (!pa)
378 return -EFAULT; 411 return -EFAULT;