diff options
author | Ohad Ben-Cohen <ohad@wizery.com> | 2011-11-10 04:32:26 -0500 |
---|---|---|
committer | Joerg Roedel <joerg.roedel@amd.com> | 2011-11-10 05:40:37 -0500 |
commit | 7d3002cc8c160dbda0e6ab9cd66dc6eb401b8b70 (patch) | |
tree | 453bad0319d12fc0a3fe6594b8e212615e7e2a70 /drivers/iommu | |
parent | 5009065d38c95455bd2d27c2838313e3dd0c5bc7 (diff) |
iommu/core: split mapping to page sizes as supported by the hardware
When mapping a memory region, split it to page sizes as supported
by the iommu hardware. Always prefer bigger pages, when possible,
in order to reduce the TLB pressure.
The logic to do that is now added to the IOMMU core, so neither the iommu
drivers themselves nor users of the IOMMU API have to duplicate it.
This allows a more lenient granularity of mappings; traditionally the
IOMMU API took 'order' (of a page) as a mapping size, and directly let
the low level iommu drivers handle the mapping, but now that the IOMMU
core can split arbitrary memory regions into pages, we can remove this
limitation, so users don't have to split those regions by themselves.
Currently the supported page sizes are advertised once and they then
remain static. That works well for OMAP and MSM but it would probably
not fly well with intel's hardware, where the page size capabilities
seem to have the potential to be different between several DMA
remapping devices.
register_iommu() currently sets a default pgsize behavior, so we can convert
the IOMMU drivers in subsequent patches. After all the drivers
are converted, the temporary default settings will be removed.
Mainline users of the IOMMU API (kvm and omap-iovmm) are adopted
to deal with bytes instead of page order.
Many thanks to Joerg Roedel <Joerg.Roedel@amd.com> for significant review!
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: David Brown <davidb@codeaurora.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <Joerg.Roedel@amd.com>
Cc: Stepan Moskovchenko <stepanm@codeaurora.org>
Cc: KyongHo Cho <pullip.cho@samsung.com>
Cc: Hiroshi DOYU <hdoyu@nvidia.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Diffstat (limited to 'drivers/iommu')
-rw-r--r-- | drivers/iommu/iommu.c | 131 | ||||
-rw-r--r-- | drivers/iommu/omap-iovmm.c | 17 |
2 files changed, 123 insertions, 25 deletions
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 7a2953d8f12e..b278458d5816 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c | |||
@@ -16,6 +16,8 @@ | |||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #define pr_fmt(fmt) "%s: " fmt, __func__ | ||
20 | |||
19 | #include <linux/device.h> | 21 | #include <linux/device.h> |
20 | #include <linux/kernel.h> | 22 | #include <linux/kernel.h> |
21 | #include <linux/bug.h> | 23 | #include <linux/bug.h> |
@@ -47,6 +49,16 @@ int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops) | |||
47 | if (bus->iommu_ops != NULL) | 49 | if (bus->iommu_ops != NULL) |
48 | return -EBUSY; | 50 | return -EBUSY; |
49 | 51 | ||
52 | /* | ||
53 | * Set the default pgsize values, which retain the existing | ||
54 | * IOMMU API behavior: drivers will be called to map | ||
55 | * regions that are sized/aligned to order of 4KiB pages. | ||
56 | * | ||
57 | * This will be removed once all drivers are migrated. | ||
58 | */ | ||
59 | if (!ops->pgsize_bitmap) | ||
60 | ops->pgsize_bitmap = ~0xFFFUL; | ||
61 | |||
50 | bus->iommu_ops = ops; | 62 | bus->iommu_ops = ops; |
51 | 63 | ||
52 | /* Do IOMMU specific setup for this bus-type */ | 64 | /* Do IOMMU specific setup for this bus-type */ |
@@ -157,34 +169,125 @@ int iommu_domain_has_cap(struct iommu_domain *domain, | |||
157 | EXPORT_SYMBOL_GPL(iommu_domain_has_cap); | 169 | EXPORT_SYMBOL_GPL(iommu_domain_has_cap); |
158 | 170 | ||
159 | int iommu_map(struct iommu_domain *domain, unsigned long iova, | 171 | int iommu_map(struct iommu_domain *domain, unsigned long iova, |
160 | phys_addr_t paddr, int gfp_order, int prot) | 172 | phys_addr_t paddr, size_t size, int prot) |
161 | { | 173 | { |
162 | size_t size; | 174 | unsigned long orig_iova = iova; |
175 | unsigned int min_pagesz; | ||
176 | size_t orig_size = size; | ||
177 | int ret = 0; | ||
163 | 178 | ||
164 | if (unlikely(domain->ops->map == NULL)) | 179 | if (unlikely(domain->ops->map == NULL)) |
165 | return -ENODEV; | 180 | return -ENODEV; |
166 | 181 | ||
167 | size = PAGE_SIZE << gfp_order; | 182 | /* find out the minimum page size supported */ |
183 | min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap); | ||
184 | |||
185 | /* | ||
186 | * both the virtual address and the physical one, as well as | ||
187 | * the size of the mapping, must be aligned (at least) to the | ||
188 | * size of the smallest page supported by the hardware | ||
189 | */ | ||
190 | if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { | ||
191 | pr_err("unaligned: iova 0x%lx pa 0x%lx size 0x%lx min_pagesz " | ||
192 | "0x%x\n", iova, (unsigned long)paddr, | ||
193 | (unsigned long)size, min_pagesz); | ||
194 | return -EINVAL; | ||
195 | } | ||
196 | |||
197 | pr_debug("map: iova 0x%lx pa 0x%lx size 0x%lx\n", iova, | ||
198 | (unsigned long)paddr, (unsigned long)size); | ||
199 | |||
200 | while (size) { | ||
201 | unsigned long pgsize, addr_merge = iova | paddr; | ||
202 | unsigned int pgsize_idx; | ||
203 | |||
204 | /* Max page size that still fits into 'size' */ | ||
205 | pgsize_idx = __fls(size); | ||
206 | |||
207 | /* need to consider alignment requirements ? */ | ||
208 | if (likely(addr_merge)) { | ||
209 | /* Max page size allowed by both iova and paddr */ | ||
210 | unsigned int align_pgsize_idx = __ffs(addr_merge); | ||
211 | |||
212 | pgsize_idx = min(pgsize_idx, align_pgsize_idx); | ||
213 | } | ||
214 | |||
215 | /* build a mask of acceptable page sizes */ | ||
216 | pgsize = (1UL << (pgsize_idx + 1)) - 1; | ||
217 | |||
218 | /* throw away page sizes not supported by the hardware */ | ||
219 | pgsize &= domain->ops->pgsize_bitmap; | ||
168 | 220 | ||
169 | BUG_ON(!IS_ALIGNED(iova | paddr, size)); | 221 | /* make sure we're still sane */ |
222 | BUG_ON(!pgsize); | ||
170 | 223 | ||
171 | return domain->ops->map(domain, iova, paddr, size, prot); | 224 | /* pick the biggest page */ |
225 | pgsize_idx = __fls(pgsize); | ||
226 | pgsize = 1UL << pgsize_idx; | ||
227 | |||
228 | pr_debug("mapping: iova 0x%lx pa 0x%lx pgsize %lu\n", iova, | ||
229 | (unsigned long)paddr, pgsize); | ||
230 | |||
231 | ret = domain->ops->map(domain, iova, paddr, pgsize, prot); | ||
232 | if (ret) | ||
233 | break; | ||
234 | |||
235 | iova += pgsize; | ||
236 | paddr += pgsize; | ||
237 | size -= pgsize; | ||
238 | } | ||
239 | |||
240 | /* unroll mapping in case something went wrong */ | ||
241 | if (ret) | ||
242 | iommu_unmap(domain, orig_iova, orig_size - size); | ||
243 | |||
244 | return ret; | ||
172 | } | 245 | } |
173 | EXPORT_SYMBOL_GPL(iommu_map); | 246 | EXPORT_SYMBOL_GPL(iommu_map); |
174 | 247 | ||
175 | int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order) | 248 | size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) |
176 | { | 249 | { |
177 | size_t size, unmapped; | 250 | size_t unmapped_page, unmapped = 0; |
251 | unsigned int min_pagesz; | ||
178 | 252 | ||
179 | if (unlikely(domain->ops->unmap == NULL)) | 253 | if (unlikely(domain->ops->unmap == NULL)) |
180 | return -ENODEV; | 254 | return -ENODEV; |
181 | 255 | ||
182 | size = PAGE_SIZE << gfp_order; | 256 | /* find out the minimum page size supported */ |
183 | 257 | min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap); | |
184 | BUG_ON(!IS_ALIGNED(iova, size)); | 258 | |
185 | 259 | /* | |
186 | unmapped = domain->ops->unmap(domain, iova, size); | 260 | * The virtual address, as well as the size of the mapping, must be |
187 | 261 | * aligned (at least) to the size of the smallest page supported | |
188 | return get_order(unmapped); | 262 | * by the hardware |
263 | */ | ||
264 | if (!IS_ALIGNED(iova | size, min_pagesz)) { | ||
265 | pr_err("unaligned: iova 0x%lx size 0x%lx min_pagesz 0x%x\n", | ||
266 | iova, (unsigned long)size, min_pagesz); | ||
267 | return -EINVAL; | ||
268 | } | ||
269 | |||
270 | pr_debug("unmap this: iova 0x%lx size 0x%lx\n", iova, | ||
271 | (unsigned long)size); | ||
272 | |||
273 | /* | ||
274 | * Keep iterating until we either unmap 'size' bytes (or more) | ||
275 | * or we hit an area that isn't mapped. | ||
276 | */ | ||
277 | while (unmapped < size) { | ||
278 | size_t left = size - unmapped; | ||
279 | |||
280 | unmapped_page = domain->ops->unmap(domain, iova, left); | ||
281 | if (!unmapped_page) | ||
282 | break; | ||
283 | |||
284 | pr_debug("unmapped: iova 0x%lx size %lx\n", iova, | ||
285 | (unsigned long)unmapped_page); | ||
286 | |||
287 | iova += unmapped_page; | ||
288 | unmapped += unmapped_page; | ||
289 | } | ||
290 | |||
291 | return unmapped; | ||
189 | } | 292 | } |
190 | EXPORT_SYMBOL_GPL(iommu_unmap); | 293 | EXPORT_SYMBOL_GPL(iommu_unmap); |
diff --git a/drivers/iommu/omap-iovmm.c b/drivers/iommu/omap-iovmm.c index e8fdb8830f69..0b7b14cb030b 100644 --- a/drivers/iommu/omap-iovmm.c +++ b/drivers/iommu/omap-iovmm.c | |||
@@ -409,7 +409,6 @@ static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new, | |||
409 | unsigned int i, j; | 409 | unsigned int i, j; |
410 | struct scatterlist *sg; | 410 | struct scatterlist *sg; |
411 | u32 da = new->da_start; | 411 | u32 da = new->da_start; |
412 | int order; | ||
413 | 412 | ||
414 | if (!domain || !sgt) | 413 | if (!domain || !sgt) |
415 | return -EINVAL; | 414 | return -EINVAL; |
@@ -428,12 +427,10 @@ static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new, | |||
428 | if (bytes_to_iopgsz(bytes) < 0) | 427 | if (bytes_to_iopgsz(bytes) < 0) |
429 | goto err_out; | 428 | goto err_out; |
430 | 429 | ||
431 | order = get_order(bytes); | ||
432 | |||
433 | pr_debug("%s: [%d] %08x %08x(%x)\n", __func__, | 430 | pr_debug("%s: [%d] %08x %08x(%x)\n", __func__, |
434 | i, da, pa, bytes); | 431 | i, da, pa, bytes); |
435 | 432 | ||
436 | err = iommu_map(domain, da, pa, order, flags); | 433 | err = iommu_map(domain, da, pa, bytes, flags); |
437 | if (err) | 434 | if (err) |
438 | goto err_out; | 435 | goto err_out; |
439 | 436 | ||
@@ -448,10 +445,9 @@ err_out: | |||
448 | size_t bytes; | 445 | size_t bytes; |
449 | 446 | ||
450 | bytes = sg->length + sg->offset; | 447 | bytes = sg->length + sg->offset; |
451 | order = get_order(bytes); | ||
452 | 448 | ||
453 | /* ignore failures.. we're already handling one */ | 449 | /* ignore failures.. we're already handling one */ |
454 | iommu_unmap(domain, da, order); | 450 | iommu_unmap(domain, da, bytes); |
455 | 451 | ||
456 | da += bytes; | 452 | da += bytes; |
457 | } | 453 | } |
@@ -466,7 +462,8 @@ static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj, | |||
466 | size_t total = area->da_end - area->da_start; | 462 | size_t total = area->da_end - area->da_start; |
467 | const struct sg_table *sgt = area->sgt; | 463 | const struct sg_table *sgt = area->sgt; |
468 | struct scatterlist *sg; | 464 | struct scatterlist *sg; |
469 | int i, err; | 465 | int i; |
466 | size_t unmapped; | ||
470 | 467 | ||
471 | BUG_ON(!sgtable_ok(sgt)); | 468 | BUG_ON(!sgtable_ok(sgt)); |
472 | BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE)); | 469 | BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE)); |
@@ -474,13 +471,11 @@ static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj, | |||
474 | start = area->da_start; | 471 | start = area->da_start; |
475 | for_each_sg(sgt->sgl, sg, sgt->nents, i) { | 472 | for_each_sg(sgt->sgl, sg, sgt->nents, i) { |
476 | size_t bytes; | 473 | size_t bytes; |
477 | int order; | ||
478 | 474 | ||
479 | bytes = sg->length + sg->offset; | 475 | bytes = sg->length + sg->offset; |
480 | order = get_order(bytes); | ||
481 | 476 | ||
482 | err = iommu_unmap(domain, start, order); | 477 | unmapped = iommu_unmap(domain, start, bytes); |
483 | if (err < 0) | 478 | if (unmapped < bytes) |
484 | break; | 479 | break; |
485 | 480 | ||
486 | dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n", | 481 | dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n", |