aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOhad Ben-Cohen <ohad@wizery.com>2011-11-10 04:32:26 -0500
committerJoerg Roedel <joerg.roedel@amd.com>2011-11-10 05:40:37 -0500
commit7d3002cc8c160dbda0e6ab9cd66dc6eb401b8b70 (patch)
tree453bad0319d12fc0a3fe6594b8e212615e7e2a70
parent5009065d38c95455bd2d27c2838313e3dd0c5bc7 (diff)
iommu/core: split mapping to page sizes as supported by the hardware
When mapping a memory region, split it to page sizes as supported by the iommu hardware. Always prefer bigger pages, when possible, in order to reduce the TLB pressure. The logic to do that is now added to the IOMMU core, so neither the iommu drivers themselves nor users of the IOMMU API have to duplicate it. This allows a more lenient granularity of mappings; traditionally the IOMMU API took 'order' (of a page) as a mapping size, and directly let the low level iommu drivers handle the mapping, but now that the IOMMU core can split arbitrary memory regions into pages, we can remove this limitation, so users don't have to split those regions by themselves. Currently the supported page sizes are advertised once and they then remain static. That works well for OMAP and MSM but it would probably not fly well with intel's hardware, where the page size capabilities seem to have the potential to be different between several DMA remapping devices. register_iommu() currently sets a default pgsize behavior, so we can convert the IOMMU drivers in subsequent patches. After all the drivers are converted, the temporary default settings will be removed. Mainline users of the IOMMU API (kvm and omap-iovmm) are adopted to deal with bytes instead of page order. Many thanks to Joerg Roedel <Joerg.Roedel@amd.com> for significant review! Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com> Cc: David Brown <davidb@codeaurora.org> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Joerg Roedel <Joerg.Roedel@amd.com> Cc: Stepan Moskovchenko <stepanm@codeaurora.org> Cc: KyongHo Cho <pullip.cho@samsung.com> Cc: Hiroshi DOYU <hdoyu@nvidia.com> Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Cc: kvm@vger.kernel.org Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
-rw-r--r--drivers/iommu/iommu.c131
-rw-r--r--drivers/iommu/omap-iovmm.c17
-rw-r--r--include/linux/iommu.h20
-rw-r--r--virt/kvm/iommu.c8
4 files changed, 144 insertions, 32 deletions
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 7a2953d8f12e..b278458d5816 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -16,6 +16,8 @@
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
19#define pr_fmt(fmt) "%s: " fmt, __func__
20
19#include <linux/device.h> 21#include <linux/device.h>
20#include <linux/kernel.h> 22#include <linux/kernel.h>
21#include <linux/bug.h> 23#include <linux/bug.h>
@@ -47,6 +49,16 @@ int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops)
47 if (bus->iommu_ops != NULL) 49 if (bus->iommu_ops != NULL)
48 return -EBUSY; 50 return -EBUSY;
49 51
52 /*
53 * Set the default pgsize values, which retain the existing
54 * IOMMU API behavior: drivers will be called to map
55 * regions that are sized/aligned to order of 4KiB pages.
56 *
57 * This will be removed once all drivers are migrated.
58 */
59 if (!ops->pgsize_bitmap)
60 ops->pgsize_bitmap = ~0xFFFUL;
61
50 bus->iommu_ops = ops; 62 bus->iommu_ops = ops;
51 63
52 /* Do IOMMU specific setup for this bus-type */ 64 /* Do IOMMU specific setup for this bus-type */
@@ -157,34 +169,125 @@ int iommu_domain_has_cap(struct iommu_domain *domain,
157EXPORT_SYMBOL_GPL(iommu_domain_has_cap); 169EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
158 170
159int iommu_map(struct iommu_domain *domain, unsigned long iova, 171int iommu_map(struct iommu_domain *domain, unsigned long iova,
160 phys_addr_t paddr, int gfp_order, int prot) 172 phys_addr_t paddr, size_t size, int prot)
161{ 173{
162 size_t size; 174 unsigned long orig_iova = iova;
175 unsigned int min_pagesz;
176 size_t orig_size = size;
177 int ret = 0;
163 178
164 if (unlikely(domain->ops->map == NULL)) 179 if (unlikely(domain->ops->map == NULL))
165 return -ENODEV; 180 return -ENODEV;
166 181
167 size = PAGE_SIZE << gfp_order; 182 /* find out the minimum page size supported */
183 min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
184
185 /*
186 * both the virtual address and the physical one, as well as
187 * the size of the mapping, must be aligned (at least) to the
188 * size of the smallest page supported by the hardware
189 */
190 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
191 pr_err("unaligned: iova 0x%lx pa 0x%lx size 0x%lx min_pagesz "
192 "0x%x\n", iova, (unsigned long)paddr,
193 (unsigned long)size, min_pagesz);
194 return -EINVAL;
195 }
196
197 pr_debug("map: iova 0x%lx pa 0x%lx size 0x%lx\n", iova,
198 (unsigned long)paddr, (unsigned long)size);
199
200 while (size) {
201 unsigned long pgsize, addr_merge = iova | paddr;
202 unsigned int pgsize_idx;
203
204 /* Max page size that still fits into 'size' */
205 pgsize_idx = __fls(size);
206
207 /* need to consider alignment requirements ? */
208 if (likely(addr_merge)) {
209 /* Max page size allowed by both iova and paddr */
210 unsigned int align_pgsize_idx = __ffs(addr_merge);
211
212 pgsize_idx = min(pgsize_idx, align_pgsize_idx);
213 }
214
215 /* build a mask of acceptable page sizes */
216 pgsize = (1UL << (pgsize_idx + 1)) - 1;
217
218 /* throw away page sizes not supported by the hardware */
219 pgsize &= domain->ops->pgsize_bitmap;
168 220
169 BUG_ON(!IS_ALIGNED(iova | paddr, size)); 221 /* make sure we're still sane */
222 BUG_ON(!pgsize);
170 223
171 return domain->ops->map(domain, iova, paddr, size, prot); 224 /* pick the biggest page */
225 pgsize_idx = __fls(pgsize);
226 pgsize = 1UL << pgsize_idx;
227
228 pr_debug("mapping: iova 0x%lx pa 0x%lx pgsize %lu\n", iova,
229 (unsigned long)paddr, pgsize);
230
231 ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
232 if (ret)
233 break;
234
235 iova += pgsize;
236 paddr += pgsize;
237 size -= pgsize;
238 }
239
240 /* unroll mapping in case something went wrong */
241 if (ret)
242 iommu_unmap(domain, orig_iova, orig_size - size);
243
244 return ret;
172} 245}
173EXPORT_SYMBOL_GPL(iommu_map); 246EXPORT_SYMBOL_GPL(iommu_map);
174 247
175int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order) 248size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
176{ 249{
177 size_t size, unmapped; 250 size_t unmapped_page, unmapped = 0;
251 unsigned int min_pagesz;
178 252
179 if (unlikely(domain->ops->unmap == NULL)) 253 if (unlikely(domain->ops->unmap == NULL))
180 return -ENODEV; 254 return -ENODEV;
181 255
182 size = PAGE_SIZE << gfp_order; 256 /* find out the minimum page size supported */
183 257 min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
184 BUG_ON(!IS_ALIGNED(iova, size)); 258
185 259 /*
186 unmapped = domain->ops->unmap(domain, iova, size); 260 * The virtual address, as well as the size of the mapping, must be
187 261 * aligned (at least) to the size of the smallest page supported
188 return get_order(unmapped); 262 * by the hardware
263 */
264 if (!IS_ALIGNED(iova | size, min_pagesz)) {
265 pr_err("unaligned: iova 0x%lx size 0x%lx min_pagesz 0x%x\n",
266 iova, (unsigned long)size, min_pagesz);
267 return -EINVAL;
268 }
269
270 pr_debug("unmap this: iova 0x%lx size 0x%lx\n", iova,
271 (unsigned long)size);
272
273 /*
274 * Keep iterating until we either unmap 'size' bytes (or more)
275 * or we hit an area that isn't mapped.
276 */
277 while (unmapped < size) {
278 size_t left = size - unmapped;
279
280 unmapped_page = domain->ops->unmap(domain, iova, left);
281 if (!unmapped_page)
282 break;
283
284 pr_debug("unmapped: iova 0x%lx size %lx\n", iova,
285 (unsigned long)unmapped_page);
286
287 iova += unmapped_page;
288 unmapped += unmapped_page;
289 }
290
291 return unmapped;
189} 292}
190EXPORT_SYMBOL_GPL(iommu_unmap); 293EXPORT_SYMBOL_GPL(iommu_unmap);
diff --git a/drivers/iommu/omap-iovmm.c b/drivers/iommu/omap-iovmm.c
index e8fdb8830f69..0b7b14cb030b 100644
--- a/drivers/iommu/omap-iovmm.c
+++ b/drivers/iommu/omap-iovmm.c
@@ -409,7 +409,6 @@ static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
409 unsigned int i, j; 409 unsigned int i, j;
410 struct scatterlist *sg; 410 struct scatterlist *sg;
411 u32 da = new->da_start; 411 u32 da = new->da_start;
412 int order;
413 412
414 if (!domain || !sgt) 413 if (!domain || !sgt)
415 return -EINVAL; 414 return -EINVAL;
@@ -428,12 +427,10 @@ static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
428 if (bytes_to_iopgsz(bytes) < 0) 427 if (bytes_to_iopgsz(bytes) < 0)
429 goto err_out; 428 goto err_out;
430 429
431 order = get_order(bytes);
432
433 pr_debug("%s: [%d] %08x %08x(%x)\n", __func__, 430 pr_debug("%s: [%d] %08x %08x(%x)\n", __func__,
434 i, da, pa, bytes); 431 i, da, pa, bytes);
435 432
436 err = iommu_map(domain, da, pa, order, flags); 433 err = iommu_map(domain, da, pa, bytes, flags);
437 if (err) 434 if (err)
438 goto err_out; 435 goto err_out;
439 436
@@ -448,10 +445,9 @@ err_out:
448 size_t bytes; 445 size_t bytes;
449 446
450 bytes = sg->length + sg->offset; 447 bytes = sg->length + sg->offset;
451 order = get_order(bytes);
452 448
453 /* ignore failures.. we're already handling one */ 449 /* ignore failures.. we're already handling one */
454 iommu_unmap(domain, da, order); 450 iommu_unmap(domain, da, bytes);
455 451
456 da += bytes; 452 da += bytes;
457 } 453 }
@@ -466,7 +462,8 @@ static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj,
466 size_t total = area->da_end - area->da_start; 462 size_t total = area->da_end - area->da_start;
467 const struct sg_table *sgt = area->sgt; 463 const struct sg_table *sgt = area->sgt;
468 struct scatterlist *sg; 464 struct scatterlist *sg;
469 int i, err; 465 int i;
466 size_t unmapped;
470 467
471 BUG_ON(!sgtable_ok(sgt)); 468 BUG_ON(!sgtable_ok(sgt));
472 BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE)); 469 BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE));
@@ -474,13 +471,11 @@ static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj,
474 start = area->da_start; 471 start = area->da_start;
475 for_each_sg(sgt->sgl, sg, sgt->nents, i) { 472 for_each_sg(sgt->sgl, sg, sgt->nents, i) {
476 size_t bytes; 473 size_t bytes;
477 int order;
478 474
479 bytes = sg->length + sg->offset; 475 bytes = sg->length + sg->offset;
480 order = get_order(bytes);
481 476
482 err = iommu_unmap(domain, start, order); 477 unmapped = iommu_unmap(domain, start, bytes);
483 if (err < 0) 478 if (unmapped < bytes)
484 break; 479 break;
485 480
486 dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n", 481 dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n",
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index d5ebf3f4dd53..cc26f89c4ee6 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -48,6 +48,19 @@ struct iommu_domain {
48 48
49#ifdef CONFIG_IOMMU_API 49#ifdef CONFIG_IOMMU_API
50 50
51/**
52 * struct iommu_ops - iommu ops and capabilities
53 * @domain_init: init iommu domain
54 * @domain_destroy: destroy iommu domain
55 * @attach_dev: attach device to an iommu domain
56 * @detach_dev: detach device from an iommu domain
57 * @map: map a physically contiguous memory region to an iommu domain
58 * @unmap: unmap a physically contiguous memory region from an iommu domain
59 * @iova_to_phys: translate iova to physical address
60 * @domain_has_cap: domain capabilities query
61 * @commit: commit iommu domain
62 * @pgsize_bitmap: bitmap of supported page sizes
63 */
51struct iommu_ops { 64struct iommu_ops {
52 int (*domain_init)(struct iommu_domain *domain); 65 int (*domain_init)(struct iommu_domain *domain);
53 void (*domain_destroy)(struct iommu_domain *domain); 66 void (*domain_destroy)(struct iommu_domain *domain);
@@ -61,6 +74,7 @@ struct iommu_ops {
61 unsigned long iova); 74 unsigned long iova);
62 int (*domain_has_cap)(struct iommu_domain *domain, 75 int (*domain_has_cap)(struct iommu_domain *domain,
63 unsigned long cap); 76 unsigned long cap);
77 unsigned long pgsize_bitmap;
64}; 78};
65 79
66extern int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops); 80extern int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops);
@@ -72,9 +86,9 @@ extern int iommu_attach_device(struct iommu_domain *domain,
72extern void iommu_detach_device(struct iommu_domain *domain, 86extern void iommu_detach_device(struct iommu_domain *domain,
73 struct device *dev); 87 struct device *dev);
74extern int iommu_map(struct iommu_domain *domain, unsigned long iova, 88extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
75 phys_addr_t paddr, int gfp_order, int prot); 89 phys_addr_t paddr, size_t size, int prot);
76extern int iommu_unmap(struct iommu_domain *domain, unsigned long iova, 90extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
77 int gfp_order); 91 size_t size);
78extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, 92extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
79 unsigned long iova); 93 unsigned long iova);
80extern int iommu_domain_has_cap(struct iommu_domain *domain, 94extern int iommu_domain_has_cap(struct iommu_domain *domain,
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index a195c07fa829..304d7e5717e9 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -113,7 +113,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
113 113
114 /* Map into IO address space */ 114 /* Map into IO address space */
115 r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), 115 r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
116 get_order(page_size), flags); 116 page_size, flags);
117 if (r) { 117 if (r) {
118 printk(KERN_ERR "kvm_iommu_map_address:" 118 printk(KERN_ERR "kvm_iommu_map_address:"
119 "iommu failed to map pfn=%llx\n", pfn); 119 "iommu failed to map pfn=%llx\n", pfn);
@@ -292,15 +292,15 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
292 292
293 while (gfn < end_gfn) { 293 while (gfn < end_gfn) {
294 unsigned long unmap_pages; 294 unsigned long unmap_pages;
295 int order; 295 size_t size;
296 296
297 /* Get physical address */ 297 /* Get physical address */
298 phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); 298 phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
299 pfn = phys >> PAGE_SHIFT; 299 pfn = phys >> PAGE_SHIFT;
300 300
301 /* Unmap address from IO address space */ 301 /* Unmap address from IO address space */
302 order = iommu_unmap(domain, gfn_to_gpa(gfn), 0); 302 size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
303 unmap_pages = 1ULL << order; 303 unmap_pages = 1ULL << get_order(size);
304 304
305 /* Unpin all pages we just unmapped to not leak any memory */ 305 /* Unpin all pages we just unmapped to not leak any memory */
306 kvm_unpin_pages(kvm, pfn, unmap_pages); 306 kvm_unpin_pages(kvm, pfn, unmap_pages);