aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorBalbir Singh <bsingharora@gmail.com>2016-09-06 02:27:31 -0400
committerMichael Ellerman <mpe@ellerman.id.au>2016-09-29 01:14:44 -0400
commit2e5bbb5461f138cac631fe21b4ad956feabfba22 (patch)
treeeb89de095b80a8f419022bb05ec40cf16a6cf3a7 /arch
parent360aebd85a4c946764f6301d68de2a817fad5159 (diff)
KVM: PPC: Book3S HV: Migrate pinned pages out of CMA
When PCI Device pass-through is enabled via VFIO, KVM-PPC will pin pages using get_user_pages_fast(). One of the downsides of the pinning is that the page could be in CMA region. The CMA region is used for other allocations like the hash page table. Ideally we want the pinned pages to be from non CMA region. This patch (currently only for KVM PPC with VFIO) forcefully migrates the pages out (huge pages are omitted for the moment). There are more efficient ways of doing this, but that might be elaborate and might impact a larger audience beyond just the kvm ppc implementation. The magic is in new_iommu_non_cma_page() which allocates the new page from a non CMA region. I've tested the patches lightly at my end. The full solution requires migration of THP pages in the CMA region. That work will be done incrementally on top of this. Signed-off-by: Balbir Singh <bsingharora@gmail.com> Acked-by: Alexey Kardashevskiy <aik@ozlabs.ru> [mpe: Merged via powerpc tree as that's where the changes are] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/mmu_context.h1
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c81
2 files changed, 78 insertions, 4 deletions
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 9d2cd0c36ec2..475d1be39191 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -18,6 +18,7 @@ extern void destroy_context(struct mm_struct *mm);
18#ifdef CONFIG_SPAPR_TCE_IOMMU 18#ifdef CONFIG_SPAPR_TCE_IOMMU
19struct mm_iommu_table_group_mem_t; 19struct mm_iommu_table_group_mem_t;
20 20
21extern int isolate_lru_page(struct page *page); /* from internal.h */
21extern bool mm_iommu_preregistered(void); 22extern bool mm_iommu_preregistered(void);
22extern long mm_iommu_get(unsigned long ua, unsigned long entries, 23extern long mm_iommu_get(unsigned long ua, unsigned long entries,
23 struct mm_iommu_table_group_mem_t **pmem); 24 struct mm_iommu_table_group_mem_t **pmem);
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index da6a2168ae9e..e0f1c33601dd 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -15,6 +15,9 @@
15#include <linux/rculist.h> 15#include <linux/rculist.h>
16#include <linux/vmalloc.h> 16#include <linux/vmalloc.h>
17#include <linux/mutex.h> 17#include <linux/mutex.h>
18#include <linux/migrate.h>
19#include <linux/hugetlb.h>
20#include <linux/swap.h>
18#include <asm/mmu_context.h> 21#include <asm/mmu_context.h>
19 22
20static DEFINE_MUTEX(mem_list_mutex); 23static DEFINE_MUTEX(mem_list_mutex);
@@ -72,6 +75,55 @@ bool mm_iommu_preregistered(void)
72} 75}
73EXPORT_SYMBOL_GPL(mm_iommu_preregistered); 76EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
74 77
78/*
79 * Taken from alloc_migrate_target with changes to remove CMA allocations
80 */
81struct page *new_iommu_non_cma_page(struct page *page, unsigned long private,
82 int **resultp)
83{
84 gfp_t gfp_mask = GFP_USER;
85 struct page *new_page;
86
87 if (PageHuge(page) || PageTransHuge(page) || PageCompound(page))
88 return NULL;
89
90 if (PageHighMem(page))
91 gfp_mask |= __GFP_HIGHMEM;
92
93 /*
94 * We don't want the allocation to force an OOM if possibe
95 */
96 new_page = alloc_page(gfp_mask | __GFP_NORETRY | __GFP_NOWARN);
97 return new_page;
98}
99
100static int mm_iommu_move_page_from_cma(struct page *page)
101{
102 int ret = 0;
103 LIST_HEAD(cma_migrate_pages);
104
105 /* Ignore huge pages for now */
106 if (PageHuge(page) || PageTransHuge(page) || PageCompound(page))
107 return -EBUSY;
108
109 lru_add_drain();
110 ret = isolate_lru_page(page);
111 if (ret)
112 return ret;
113
114 list_add(&page->lru, &cma_migrate_pages);
115 put_page(page); /* Drop the gup reference */
116
117 ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page,
118 NULL, 0, MIGRATE_SYNC, MR_CMA);
119 if (ret) {
120 if (!list_empty(&cma_migrate_pages))
121 putback_movable_pages(&cma_migrate_pages);
122 }
123
124 return 0;
125}
126
75long mm_iommu_get(unsigned long ua, unsigned long entries, 127long mm_iommu_get(unsigned long ua, unsigned long entries,
76 struct mm_iommu_table_group_mem_t **pmem) 128 struct mm_iommu_table_group_mem_t **pmem)
77{ 129{
@@ -124,15 +176,36 @@ long mm_iommu_get(unsigned long ua, unsigned long entries,
124 for (i = 0; i < entries; ++i) { 176 for (i = 0; i < entries; ++i) {
125 if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT), 177 if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT),
126 1/* pages */, 1/* iswrite */, &page)) { 178 1/* pages */, 1/* iswrite */, &page)) {
179 ret = -EFAULT;
127 for (j = 0; j < i; ++j) 180 for (j = 0; j < i; ++j)
128 put_page(pfn_to_page( 181 put_page(pfn_to_page(mem->hpas[j] >>
129 mem->hpas[j] >> PAGE_SHIFT)); 182 PAGE_SHIFT));
130 vfree(mem->hpas); 183 vfree(mem->hpas);
131 kfree(mem); 184 kfree(mem);
132 ret = -EFAULT;
133 goto unlock_exit; 185 goto unlock_exit;
134 } 186 }
135 187 /*
188 * If we get a page from the CMA zone, since we are going to
189 * be pinning these entries, we might as well move them out
190 * of the CMA zone if possible. NOTE: faulting in + migration
191 * can be expensive. Batching can be considered later
192 */
193 if (get_pageblock_migratetype(page) == MIGRATE_CMA) {
194 if (mm_iommu_move_page_from_cma(page))
195 goto populate;
196 if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT),
197 1/* pages */, 1/* iswrite */,
198 &page)) {
199 ret = -EFAULT;
200 for (j = 0; j < i; ++j)
201 put_page(pfn_to_page(mem->hpas[j] >>
202 PAGE_SHIFT));
203 vfree(mem->hpas);
204 kfree(mem);
205 goto unlock_exit;
206 }
207 }
208populate:
136 mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; 209 mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
137 } 210 }
138 211