aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/mm/dma-mapping.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/mm/dma-mapping.c')
-rw-r--r--arch/arm/mm/dma-mapping.c577
1 files changed, 235 insertions, 342 deletions
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 4044abcf6f9d..4e7d1182e8a3 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -22,13 +22,14 @@
22#include <linux/memblock.h> 22#include <linux/memblock.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/iommu.h> 24#include <linux/iommu.h>
25#include <linux/io.h>
25#include <linux/vmalloc.h> 26#include <linux/vmalloc.h>
27#include <linux/sizes.h>
26 28
27#include <asm/memory.h> 29#include <asm/memory.h>
28#include <asm/highmem.h> 30#include <asm/highmem.h>
29#include <asm/cacheflush.h> 31#include <asm/cacheflush.h>
30#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
31#include <asm/sizes.h>
32#include <asm/mach/arch.h> 33#include <asm/mach/arch.h>
33#include <asm/dma-iommu.h> 34#include <asm/dma-iommu.h>
34#include <asm/mach/map.h> 35#include <asm/mach/map.h>
@@ -72,7 +73,7 @@ static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page,
72 unsigned long offset, size_t size, enum dma_data_direction dir, 73 unsigned long offset, size_t size, enum dma_data_direction dir,
73 struct dma_attrs *attrs) 74 struct dma_attrs *attrs)
74{ 75{
75 if (!arch_is_coherent()) 76 if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
76 __dma_page_cpu_to_dev(page, offset, size, dir); 77 __dma_page_cpu_to_dev(page, offset, size, dir);
77 return pfn_to_dma(dev, page_to_pfn(page)) + offset; 78 return pfn_to_dma(dev, page_to_pfn(page)) + offset;
78} 79}
@@ -95,7 +96,7 @@ static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle,
95 size_t size, enum dma_data_direction dir, 96 size_t size, enum dma_data_direction dir,
96 struct dma_attrs *attrs) 97 struct dma_attrs *attrs)
97{ 98{
98 if (!arch_is_coherent()) 99 if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
99 __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), 100 __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)),
100 handle & ~PAGE_MASK, size, dir); 101 handle & ~PAGE_MASK, size, dir);
101} 102}
@@ -124,6 +125,7 @@ struct dma_map_ops arm_dma_ops = {
124 .alloc = arm_dma_alloc, 125 .alloc = arm_dma_alloc,
125 .free = arm_dma_free, 126 .free = arm_dma_free,
126 .mmap = arm_dma_mmap, 127 .mmap = arm_dma_mmap,
128 .get_sgtable = arm_dma_get_sgtable,
127 .map_page = arm_dma_map_page, 129 .map_page = arm_dma_map_page,
128 .unmap_page = arm_dma_unmap_page, 130 .unmap_page = arm_dma_unmap_page,
129 .map_sg = arm_dma_map_sg, 131 .map_sg = arm_dma_map_sg,
@@ -217,115 +219,70 @@ static void __dma_free_buffer(struct page *page, size_t size)
217} 219}
218 220
219#ifdef CONFIG_MMU 221#ifdef CONFIG_MMU
222#ifdef CONFIG_HUGETLB_PAGE
223#error ARM Coherent DMA allocator does not (yet) support huge TLB
224#endif
220 225
221#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - consistent_base) >> PAGE_SHIFT) 226static void *__alloc_from_contiguous(struct device *dev, size_t size,
222#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PMD_SHIFT) 227 pgprot_t prot, struct page **ret_page);
223
224/*
225 * These are the page tables (2MB each) covering uncached, DMA consistent allocations
226 */
227static pte_t **consistent_pte;
228
229#define DEFAULT_CONSISTENT_DMA_SIZE SZ_2M
230 228
231static unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE; 229static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
230 pgprot_t prot, struct page **ret_page,
231 const void *caller);
232 232
233void __init init_consistent_dma_size(unsigned long size) 233static void *
234__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
235 const void *caller)
234{ 236{
235 unsigned long base = CONSISTENT_END - ALIGN(size, SZ_2M); 237 struct vm_struct *area;
238 unsigned long addr;
236 239
237 BUG_ON(consistent_pte); /* Check we're called before DMA region init */ 240 /*
238 BUG_ON(base < VMALLOC_END); 241 * DMA allocation can be mapped to user space, so lets
242 * set VM_USERMAP flags too.
243 */
244 area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP,
245 caller);
246 if (!area)
247 return NULL;
248 addr = (unsigned long)area->addr;
249 area->phys_addr = __pfn_to_phys(page_to_pfn(page));
239 250
240 /* Grow region to accommodate specified size */ 251 if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) {
241 if (base < consistent_base) 252 vunmap((void *)addr);
242 consistent_base = base; 253 return NULL;
254 }
255 return (void *)addr;
243} 256}
244 257
245#include "vmregion.h" 258static void __dma_free_remap(void *cpu_addr, size_t size)
246
247static struct arm_vmregion_head consistent_head = {
248 .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock),
249 .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
250 .vm_end = CONSISTENT_END,
251};
252
253#ifdef CONFIG_HUGETLB_PAGE
254#error ARM Coherent DMA allocator does not (yet) support huge TLB
255#endif
256
257/*
258 * Initialise the consistent memory allocation.
259 */
260static int __init consistent_init(void)
261{ 259{
262 int ret = 0; 260 unsigned int flags = VM_ARM_DMA_CONSISTENT | VM_USERMAP;
263 pgd_t *pgd; 261 struct vm_struct *area = find_vm_area(cpu_addr);
264 pud_t *pud; 262 if (!area || (area->flags & flags) != flags) {
265 pmd_t *pmd; 263 WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
266 pte_t *pte; 264 return;
267 int i = 0;
268 unsigned long base = consistent_base;
269 unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT;
270
271 if (IS_ENABLED(CONFIG_CMA) && !IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))
272 return 0;
273
274 consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL);
275 if (!consistent_pte) {
276 pr_err("%s: no memory\n", __func__);
277 return -ENOMEM;
278 } 265 }
279 266 unmap_kernel_range((unsigned long)cpu_addr, size);
280 pr_debug("DMA memory: 0x%08lx - 0x%08lx:\n", base, CONSISTENT_END); 267 vunmap(cpu_addr);
281 consistent_head.vm_start = base;
282
283 do {
284 pgd = pgd_offset(&init_mm, base);
285
286 pud = pud_alloc(&init_mm, pgd, base);
287 if (!pud) {
288 pr_err("%s: no pud tables\n", __func__);
289 ret = -ENOMEM;
290 break;
291 }
292
293 pmd = pmd_alloc(&init_mm, pud, base);
294 if (!pmd) {
295 pr_err("%s: no pmd tables\n", __func__);
296 ret = -ENOMEM;
297 break;
298 }
299 WARN_ON(!pmd_none(*pmd));
300
301 pte = pte_alloc_kernel(pmd, base);
302 if (!pte) {
303 pr_err("%s: no pte tables\n", __func__);
304 ret = -ENOMEM;
305 break;
306 }
307
308 consistent_pte[i++] = pte;
309 base += PMD_SIZE;
310 } while (base < CONSISTENT_END);
311
312 return ret;
313} 268}
314core_initcall(consistent_init);
315 269
316static void *__alloc_from_contiguous(struct device *dev, size_t size, 270struct dma_pool {
317 pgprot_t prot, struct page **ret_page); 271 size_t size;
318 272 spinlock_t lock;
319static struct arm_vmregion_head coherent_head = { 273 unsigned long *bitmap;
320 .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock), 274 unsigned long nr_pages;
321 .vm_list = LIST_HEAD_INIT(coherent_head.vm_list), 275 void *vaddr;
276 struct page *page;
322}; 277};
323 278
324static size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8; 279static struct dma_pool atomic_pool = {
280 .size = SZ_256K,
281};
325 282
326static int __init early_coherent_pool(char *p) 283static int __init early_coherent_pool(char *p)
327{ 284{
328 coherent_pool_size = memparse(p, &p); 285 atomic_pool.size = memparse(p, &p);
329 return 0; 286 return 0;
330} 287}
331early_param("coherent_pool", early_coherent_pool); 288early_param("coherent_pool", early_coherent_pool);
@@ -333,32 +290,45 @@ early_param("coherent_pool", early_coherent_pool);
333/* 290/*
334 * Initialise the coherent pool for atomic allocations. 291 * Initialise the coherent pool for atomic allocations.
335 */ 292 */
336static int __init coherent_init(void) 293static int __init atomic_pool_init(void)
337{ 294{
295 struct dma_pool *pool = &atomic_pool;
338 pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); 296 pgprot_t prot = pgprot_dmacoherent(pgprot_kernel);
339 size_t size = coherent_pool_size; 297 unsigned long nr_pages = pool->size >> PAGE_SHIFT;
298 unsigned long *bitmap;
340 struct page *page; 299 struct page *page;
341 void *ptr; 300 void *ptr;
301 int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long);
342 302
343 if (!IS_ENABLED(CONFIG_CMA)) 303 bitmap = kzalloc(bitmap_size, GFP_KERNEL);
344 return 0; 304 if (!bitmap)
305 goto no_bitmap;
345 306
346 ptr = __alloc_from_contiguous(NULL, size, prot, &page); 307 if (IS_ENABLED(CONFIG_CMA))
308 ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page);
309 else
310 ptr = __alloc_remap_buffer(NULL, pool->size, GFP_KERNEL, prot,
311 &page, NULL);
347 if (ptr) { 312 if (ptr) {
348 coherent_head.vm_start = (unsigned long) ptr; 313 spin_lock_init(&pool->lock);
349 coherent_head.vm_end = (unsigned long) ptr + size; 314 pool->vaddr = ptr;
350 printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n", 315 pool->page = page;
351 (unsigned)size / 1024); 316 pool->bitmap = bitmap;
317 pool->nr_pages = nr_pages;
318 pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n",
319 (unsigned)pool->size / 1024);
352 return 0; 320 return 0;
353 } 321 }
354 printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", 322 kfree(bitmap);
355 (unsigned)size / 1024); 323no_bitmap:
324 pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
325 (unsigned)pool->size / 1024);
356 return -ENOMEM; 326 return -ENOMEM;
357} 327}
358/* 328/*
359 * CMA is activated by core_initcall, so we must be called after it. 329 * CMA is activated by core_initcall, so we must be called after it.
360 */ 330 */
361postcore_initcall(coherent_init); 331postcore_initcall(atomic_pool_init);
362 332
363struct dma_contig_early_reserve { 333struct dma_contig_early_reserve {
364 phys_addr_t base; 334 phys_addr_t base;
@@ -388,7 +358,7 @@ void __init dma_contiguous_remap(void)
388 if (end > arm_lowmem_limit) 358 if (end > arm_lowmem_limit)
389 end = arm_lowmem_limit; 359 end = arm_lowmem_limit;
390 if (start >= end) 360 if (start >= end)
391 return; 361 continue;
392 362
393 map.pfn = __phys_to_pfn(start); 363 map.pfn = __phys_to_pfn(start);
394 map.virtual = __phys_to_virt(start); 364 map.virtual = __phys_to_virt(start);
@@ -406,112 +376,6 @@ void __init dma_contiguous_remap(void)
406 } 376 }
407} 377}
408 378
409static void *
410__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
411 const void *caller)
412{
413 struct arm_vmregion *c;
414 size_t align;
415 int bit;
416
417 if (!consistent_pte) {
418 pr_err("%s: not initialised\n", __func__);
419 dump_stack();
420 return NULL;
421 }
422
423 /*
424 * Align the virtual region allocation - maximum alignment is
425 * a section size, minimum is a page size. This helps reduce
426 * fragmentation of the DMA space, and also prevents allocations
427 * smaller than a section from crossing a section boundary.
428 */
429 bit = fls(size - 1);
430 if (bit > SECTION_SHIFT)
431 bit = SECTION_SHIFT;
432 align = 1 << bit;
433
434 /*
435 * Allocate a virtual address in the consistent mapping region.
436 */
437 c = arm_vmregion_alloc(&consistent_head, align, size,
438 gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller);
439 if (c) {
440 pte_t *pte;
441 int idx = CONSISTENT_PTE_INDEX(c->vm_start);
442 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
443
444 pte = consistent_pte[idx] + off;
445 c->priv = page;
446
447 do {
448 BUG_ON(!pte_none(*pte));
449
450 set_pte_ext(pte, mk_pte(page, prot), 0);
451 page++;
452 pte++;
453 off++;
454 if (off >= PTRS_PER_PTE) {
455 off = 0;
456 pte = consistent_pte[++idx];
457 }
458 } while (size -= PAGE_SIZE);
459
460 dsb();
461
462 return (void *)c->vm_start;
463 }
464 return NULL;
465}
466
467static void __dma_free_remap(void *cpu_addr, size_t size)
468{
469 struct arm_vmregion *c;
470 unsigned long addr;
471 pte_t *ptep;
472 int idx;
473 u32 off;
474
475 c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr);
476 if (!c) {
477 pr_err("%s: trying to free invalid coherent area: %p\n",
478 __func__, cpu_addr);
479 dump_stack();
480 return;
481 }
482
483 if ((c->vm_end - c->vm_start) != size) {
484 pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
485 __func__, c->vm_end - c->vm_start, size);
486 dump_stack();
487 size = c->vm_end - c->vm_start;
488 }
489
490 idx = CONSISTENT_PTE_INDEX(c->vm_start);
491 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
492 ptep = consistent_pte[idx] + off;
493 addr = c->vm_start;
494 do {
495 pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
496
497 ptep++;
498 addr += PAGE_SIZE;
499 off++;
500 if (off >= PTRS_PER_PTE) {
501 off = 0;
502 ptep = consistent_pte[++idx];
503 }
504
505 if (pte_none(pte) || !pte_present(pte))
506 pr_crit("%s: bad page in kernel page table\n",
507 __func__);
508 } while (size -= PAGE_SIZE);
509
510 flush_tlb_kernel_range(c->vm_start, c->vm_end);
511
512 arm_vmregion_free(&consistent_head, c);
513}
514
515static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, 379static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr,
516 void *data) 380 void *data)
517{ 381{
@@ -552,16 +416,17 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
552 return ptr; 416 return ptr;
553} 417}
554 418
555static void *__alloc_from_pool(struct device *dev, size_t size, 419static void *__alloc_from_pool(size_t size, struct page **ret_page)
556 struct page **ret_page, const void *caller)
557{ 420{
558 struct arm_vmregion *c; 421 struct dma_pool *pool = &atomic_pool;
559 size_t align; 422 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
423 unsigned int pageno;
424 unsigned long flags;
425 void *ptr = NULL;
426 unsigned long align_mask;
560 427
561 if (!coherent_head.vm_start) { 428 if (!pool->vaddr) {
562 printk(KERN_ERR "%s: coherent pool not initialised!\n", 429 WARN(1, "coherent pool not initialised!\n");
563 __func__);
564 dump_stack();
565 return NULL; 430 return NULL;
566 } 431 }
567 432
@@ -570,36 +435,42 @@ static void *__alloc_from_pool(struct device *dev, size_t size,
570 * small, so align them to their order in pages, minimum is a page 435 * small, so align them to their order in pages, minimum is a page
571 * size. This helps reduce fragmentation of the DMA space. 436 * size. This helps reduce fragmentation of the DMA space.
572 */ 437 */
573 align = PAGE_SIZE << get_order(size); 438 align_mask = (1 << get_order(size)) - 1;
574 c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller); 439
575 if (c) { 440 spin_lock_irqsave(&pool->lock, flags);
576 void *ptr = (void *)c->vm_start; 441 pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages,
577 struct page *page = virt_to_page(ptr); 442 0, count, align_mask);
578 *ret_page = page; 443 if (pageno < pool->nr_pages) {
579 return ptr; 444 bitmap_set(pool->bitmap, pageno, count);
445 ptr = pool->vaddr + PAGE_SIZE * pageno;
446 *ret_page = pool->page + pageno;
580 } 447 }
581 return NULL; 448 spin_unlock_irqrestore(&pool->lock, flags);
449
450 return ptr;
582} 451}
583 452
584static int __free_from_pool(void *cpu_addr, size_t size) 453static int __free_from_pool(void *start, size_t size)
585{ 454{
586 unsigned long start = (unsigned long)cpu_addr; 455 struct dma_pool *pool = &atomic_pool;
587 unsigned long end = start + size; 456 unsigned long pageno, count;
588 struct arm_vmregion *c; 457 unsigned long flags;
589 458
590 if (start < coherent_head.vm_start || end > coherent_head.vm_end) 459 if (start < pool->vaddr || start > pool->vaddr + pool->size)
591 return 0; 460 return 0;
592 461
593 c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start); 462 if (start + size > pool->vaddr + pool->size) {
594 463 WARN(1, "freeing wrong coherent size from pool\n");
595 if ((c->vm_end - c->vm_start) != size) { 464 return 0;
596 printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
597 __func__, c->vm_end - c->vm_start, size);
598 dump_stack();
599 size = c->vm_end - c->vm_start;
600 } 465 }
601 466
602 arm_vmregion_free(&coherent_head, c); 467 pageno = (start - pool->vaddr) >> PAGE_SHIFT;
468 count = size >> PAGE_SHIFT;
469
470 spin_lock_irqsave(&pool->lock, flags);
471 bitmap_clear(pool->bitmap, pageno, count);
472 spin_unlock_irqrestore(&pool->lock, flags);
473
603 return 1; 474 return 1;
604} 475}
605 476
@@ -644,7 +515,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
644 515
645#define __get_dma_pgprot(attrs, prot) __pgprot(0) 516#define __get_dma_pgprot(attrs, prot) __pgprot(0)
646#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL 517#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL
647#define __alloc_from_pool(dev, size, ret_page, c) NULL 518#define __alloc_from_pool(size, ret_page) NULL
648#define __alloc_from_contiguous(dev, size, prot, ret) NULL 519#define __alloc_from_contiguous(dev, size, prot, ret) NULL
649#define __free_from_pool(cpu_addr, size) 0 520#define __free_from_pool(cpu_addr, size) 0
650#define __free_from_contiguous(dev, page, size) do { } while (0) 521#define __free_from_contiguous(dev, page, size) do { } while (0)
@@ -702,10 +573,10 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
702 573
703 if (arch_is_coherent() || nommu()) 574 if (arch_is_coherent() || nommu())
704 addr = __alloc_simple_buffer(dev, size, gfp, &page); 575 addr = __alloc_simple_buffer(dev, size, gfp, &page);
576 else if (gfp & GFP_ATOMIC)
577 addr = __alloc_from_pool(size, &page);
705 else if (!IS_ENABLED(CONFIG_CMA)) 578 else if (!IS_ENABLED(CONFIG_CMA))
706 addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); 579 addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
707 else if (gfp & GFP_ATOMIC)
708 addr = __alloc_from_pool(dev, size, &page, caller);
709 else 580 else
710 addr = __alloc_from_contiguous(dev, size, prot, &page); 581 addr = __alloc_from_contiguous(dev, size, prot, &page);
711 582
@@ -741,16 +612,22 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
741{ 612{
742 int ret = -ENXIO; 613 int ret = -ENXIO;
743#ifdef CONFIG_MMU 614#ifdef CONFIG_MMU
615 unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
616 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
744 unsigned long pfn = dma_to_pfn(dev, dma_addr); 617 unsigned long pfn = dma_to_pfn(dev, dma_addr);
618 unsigned long off = vma->vm_pgoff;
619
745 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); 620 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
746 621
747 if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) 622 if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
748 return ret; 623 return ret;
749 624
750 ret = remap_pfn_range(vma, vma->vm_start, 625 if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
751 pfn + vma->vm_pgoff, 626 ret = remap_pfn_range(vma, vma->vm_start,
752 vma->vm_end - vma->vm_start, 627 pfn + off,
753 vma->vm_page_prot); 628 vma->vm_end - vma->vm_start,
629 vma->vm_page_prot);
630 }
754#endif /* CONFIG_MMU */ 631#endif /* CONFIG_MMU */
755 632
756 return ret; 633 return ret;
@@ -771,12 +648,12 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
771 648
772 if (arch_is_coherent() || nommu()) { 649 if (arch_is_coherent() || nommu()) {
773 __dma_free_buffer(page, size); 650 __dma_free_buffer(page, size);
651 } else if (__free_from_pool(cpu_addr, size)) {
652 return;
774 } else if (!IS_ENABLED(CONFIG_CMA)) { 653 } else if (!IS_ENABLED(CONFIG_CMA)) {
775 __dma_free_remap(cpu_addr, size); 654 __dma_free_remap(cpu_addr, size);
776 __dma_free_buffer(page, size); 655 __dma_free_buffer(page, size);
777 } else { 656 } else {
778 if (__free_from_pool(cpu_addr, size))
779 return;
780 /* 657 /*
781 * Non-atomic allocations cannot be freed with IRQs disabled 658 * Non-atomic allocations cannot be freed with IRQs disabled
782 */ 659 */
@@ -785,6 +662,21 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
785 } 662 }
786} 663}
787 664
665int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
666 void *cpu_addr, dma_addr_t handle, size_t size,
667 struct dma_attrs *attrs)
668{
669 struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
670 int ret;
671
672 ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
673 if (unlikely(ret))
674 return ret;
675
676 sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
677 return 0;
678}
679
788static void dma_cache_maint_page(struct page *page, unsigned long offset, 680static void dma_cache_maint_page(struct page *page, unsigned long offset,
789 size_t size, enum dma_data_direction dir, 681 size_t size, enum dma_data_direction dir,
790 void (*op)(const void *, size_t, int)) 682 void (*op)(const void *, size_t, int))
@@ -998,9 +890,6 @@ static int arm_dma_set_mask(struct device *dev, u64 dma_mask)
998 890
999static int __init dma_debug_do_init(void) 891static int __init dma_debug_do_init(void)
1000{ 892{
1001#ifdef CONFIG_MMU
1002 arm_vmregion_create_proc("dma-mappings", &consistent_head);
1003#endif
1004 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); 893 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
1005 return 0; 894 return 0;
1006} 895}
@@ -1088,10 +977,10 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t
1088 977
1089 return pages; 978 return pages;
1090error: 979error:
1091 while (--i) 980 while (i--)
1092 if (pages[i]) 981 if (pages[i])
1093 __free_pages(pages[i], 0); 982 __free_pages(pages[i], 0);
1094 if (array_size < PAGE_SIZE) 983 if (array_size <= PAGE_SIZE)
1095 kfree(pages); 984 kfree(pages);
1096 else 985 else
1097 vfree(pages); 986 vfree(pages);
@@ -1106,7 +995,7 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t s
1106 for (i = 0; i < count; i++) 995 for (i = 0; i < count; i++)
1107 if (pages[i]) 996 if (pages[i])
1108 __free_pages(pages[i], 0); 997 __free_pages(pages[i], 0);
1109 if (array_size < PAGE_SIZE) 998 if (array_size <= PAGE_SIZE)
1110 kfree(pages); 999 kfree(pages);
1111 else 1000 else
1112 vfree(pages); 1001 vfree(pages);
@@ -1117,61 +1006,32 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t s
1117 * Create a CPU mapping for a specified pages 1006 * Create a CPU mapping for a specified pages
1118 */ 1007 */
1119static void * 1008static void *
1120__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot) 1009__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
1010 const void *caller)
1121{ 1011{
1122 struct arm_vmregion *c; 1012 unsigned int i, nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
1123 size_t align; 1013 struct vm_struct *area;
1124 size_t count = size >> PAGE_SHIFT; 1014 unsigned long p;
1125 int bit;
1126 1015
1127 if (!consistent_pte[0]) { 1016 area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP,
1128 pr_err("%s: not initialised\n", __func__); 1017 caller);
1129 dump_stack(); 1018 if (!area)
1130 return NULL; 1019 return NULL;
1131 }
1132
1133 /*
1134 * Align the virtual region allocation - maximum alignment is
1135 * a section size, minimum is a page size. This helps reduce
1136 * fragmentation of the DMA space, and also prevents allocations
1137 * smaller than a section from crossing a section boundary.
1138 */
1139 bit = fls(size - 1);
1140 if (bit > SECTION_SHIFT)
1141 bit = SECTION_SHIFT;
1142 align = 1 << bit;
1143
1144 /*
1145 * Allocate a virtual address in the consistent mapping region.
1146 */
1147 c = arm_vmregion_alloc(&consistent_head, align, size,
1148 gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL);
1149 if (c) {
1150 pte_t *pte;
1151 int idx = CONSISTENT_PTE_INDEX(c->vm_start);
1152 int i = 0;
1153 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
1154
1155 pte = consistent_pte[idx] + off;
1156 c->priv = pages;
1157
1158 do {
1159 BUG_ON(!pte_none(*pte));
1160
1161 set_pte_ext(pte, mk_pte(pages[i], prot), 0);
1162 pte++;
1163 off++;
1164 i++;
1165 if (off >= PTRS_PER_PTE) {
1166 off = 0;
1167 pte = consistent_pte[++idx];
1168 }
1169 } while (i < count);
1170 1020
1171 dsb(); 1021 area->pages = pages;
1022 area->nr_pages = nr_pages;
1023 p = (unsigned long)area->addr;
1172 1024
1173 return (void *)c->vm_start; 1025 for (i = 0; i < nr_pages; i++) {
1026 phys_addr_t phys = __pfn_to_phys(page_to_pfn(pages[i]));
1027 if (ioremap_page_range(p, p + PAGE_SIZE, phys, prot))
1028 goto err;
1029 p += PAGE_SIZE;
1174 } 1030 }
1031 return area->addr;
1032err:
1033 unmap_kernel_range((unsigned long)area->addr, size);
1034 vunmap(area->addr);
1175 return NULL; 1035 return NULL;
1176} 1036}
1177 1037
@@ -1230,6 +1090,19 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si
1230 return 0; 1090 return 0;
1231} 1091}
1232 1092
1093static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs)
1094{
1095 struct vm_struct *area;
1096
1097 if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
1098 return cpu_addr;
1099
1100 area = find_vm_area(cpu_addr);
1101 if (area && (area->flags & VM_ARM_DMA_CONSISTENT))
1102 return area->pages;
1103 return NULL;
1104}
1105
1233static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, 1106static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
1234 dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) 1107 dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
1235{ 1108{
@@ -1248,7 +1121,11 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
1248 if (*handle == DMA_ERROR_CODE) 1121 if (*handle == DMA_ERROR_CODE)
1249 goto err_buffer; 1122 goto err_buffer;
1250 1123
1251 addr = __iommu_alloc_remap(pages, size, gfp, prot); 1124 if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
1125 return pages;
1126
1127 addr = __iommu_alloc_remap(pages, size, gfp, prot,
1128 __builtin_return_address(0));
1252 if (!addr) 1129 if (!addr)
1253 goto err_mapping; 1130 goto err_mapping;
1254 1131
@@ -1265,31 +1142,25 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
1265 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1142 void *cpu_addr, dma_addr_t dma_addr, size_t size,
1266 struct dma_attrs *attrs) 1143 struct dma_attrs *attrs)
1267{ 1144{
1268 struct arm_vmregion *c; 1145 unsigned long uaddr = vma->vm_start;
1146 unsigned long usize = vma->vm_end - vma->vm_start;
1147 struct page **pages = __iommu_get_pages(cpu_addr, attrs);
1269 1148
1270 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); 1149 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
1271 c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
1272
1273 if (c) {
1274 struct page **pages = c->priv;
1275
1276 unsigned long uaddr = vma->vm_start;
1277 unsigned long usize = vma->vm_end - vma->vm_start;
1278 int i = 0;
1279 1150
1280 do { 1151 if (!pages)
1281 int ret; 1152 return -ENXIO;
1282 1153
1283 ret = vm_insert_page(vma, uaddr, pages[i++]); 1154 do {
1284 if (ret) { 1155 int ret = vm_insert_page(vma, uaddr, *pages++);
1285 pr_err("Remapping memory, error: %d\n", ret); 1156 if (ret) {
1286 return ret; 1157 pr_err("Remapping memory failed: %d\n", ret);
1287 } 1158 return ret;
1159 }
1160 uaddr += PAGE_SIZE;
1161 usize -= PAGE_SIZE;
1162 } while (usize > 0);
1288 1163
1289 uaddr += PAGE_SIZE;
1290 usize -= PAGE_SIZE;
1291 } while (usize > 0);
1292 }
1293 return 0; 1164 return 0;
1294} 1165}
1295 1166
@@ -1300,16 +1171,35 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
1300void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, 1171void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
1301 dma_addr_t handle, struct dma_attrs *attrs) 1172 dma_addr_t handle, struct dma_attrs *attrs)
1302{ 1173{
1303 struct arm_vmregion *c; 1174 struct page **pages = __iommu_get_pages(cpu_addr, attrs);
1304 size = PAGE_ALIGN(size); 1175 size = PAGE_ALIGN(size);
1305 1176
1306 c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); 1177 if (!pages) {
1307 if (c) { 1178 WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
1308 struct page **pages = c->priv; 1179 return;
1309 __dma_free_remap(cpu_addr, size);
1310 __iommu_remove_mapping(dev, handle, size);
1311 __iommu_free_buffer(dev, pages, size);
1312 } 1180 }
1181
1182 if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) {
1183 unmap_kernel_range((unsigned long)cpu_addr, size);
1184 vunmap(cpu_addr);
1185 }
1186
1187 __iommu_remove_mapping(dev, handle, size);
1188 __iommu_free_buffer(dev, pages, size);
1189}
1190
1191static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
1192 void *cpu_addr, dma_addr_t dma_addr,
1193 size_t size, struct dma_attrs *attrs)
1194{
1195 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
1196 struct page **pages = __iommu_get_pages(cpu_addr, attrs);
1197
1198 if (!pages)
1199 return -ENXIO;
1200
1201 return sg_alloc_table_from_pages(sgt, pages, count, 0, size,
1202 GFP_KERNEL);
1313} 1203}
1314 1204
1315/* 1205/*
@@ -1317,7 +1207,7 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
1317 */ 1207 */
1318static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, 1208static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
1319 size_t size, dma_addr_t *handle, 1209 size_t size, dma_addr_t *handle,
1320 enum dma_data_direction dir) 1210 enum dma_data_direction dir, struct dma_attrs *attrs)
1321{ 1211{
1322 struct dma_iommu_mapping *mapping = dev->archdata.mapping; 1212 struct dma_iommu_mapping *mapping = dev->archdata.mapping;
1323 dma_addr_t iova, iova_base; 1213 dma_addr_t iova, iova_base;
@@ -1336,7 +1226,8 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
1336 phys_addr_t phys = page_to_phys(sg_page(s)); 1226 phys_addr_t phys = page_to_phys(sg_page(s));
1337 unsigned int len = PAGE_ALIGN(s->offset + s->length); 1227 unsigned int len = PAGE_ALIGN(s->offset + s->length);
1338 1228
1339 if (!arch_is_coherent()) 1229 if (!arch_is_coherent() &&
1230 !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
1340 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); 1231 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
1341 1232
1342 ret = iommu_map(mapping->domain, iova, phys, len, 0); 1233 ret = iommu_map(mapping->domain, iova, phys, len, 0);
@@ -1383,7 +1274,7 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
1383 1274
1384 if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { 1275 if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
1385 if (__map_sg_chunk(dev, start, size, &dma->dma_address, 1276 if (__map_sg_chunk(dev, start, size, &dma->dma_address,
1386 dir) < 0) 1277 dir, attrs) < 0)
1387 goto bad_mapping; 1278 goto bad_mapping;
1388 1279
1389 dma->dma_address += offset; 1280 dma->dma_address += offset;
@@ -1396,7 +1287,7 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
1396 } 1287 }
1397 size += s->length; 1288 size += s->length;
1398 } 1289 }
1399 if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0) 1290 if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs) < 0)
1400 goto bad_mapping; 1291 goto bad_mapping;
1401 1292
1402 dma->dma_address += offset; 1293 dma->dma_address += offset;
@@ -1430,7 +1321,8 @@ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
1430 if (sg_dma_len(s)) 1321 if (sg_dma_len(s))
1431 __iommu_remove_mapping(dev, sg_dma_address(s), 1322 __iommu_remove_mapping(dev, sg_dma_address(s),
1432 sg_dma_len(s)); 1323 sg_dma_len(s));
1433 if (!arch_is_coherent()) 1324 if (!arch_is_coherent() &&
1325 !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
1434 __dma_page_dev_to_cpu(sg_page(s), s->offset, 1326 __dma_page_dev_to_cpu(sg_page(s), s->offset,
1435 s->length, dir); 1327 s->length, dir);
1436 } 1328 }
@@ -1492,7 +1384,7 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
1492 dma_addr_t dma_addr; 1384 dma_addr_t dma_addr;
1493 int ret, len = PAGE_ALIGN(size + offset); 1385 int ret, len = PAGE_ALIGN(size + offset);
1494 1386
1495 if (!arch_is_coherent()) 1387 if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
1496 __dma_page_cpu_to_dev(page, offset, size, dir); 1388 __dma_page_cpu_to_dev(page, offset, size, dir);
1497 1389
1498 dma_addr = __alloc_iova(mapping, len); 1390 dma_addr = __alloc_iova(mapping, len);
@@ -1531,7 +1423,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
1531 if (!iova) 1423 if (!iova)
1532 return; 1424 return;
1533 1425
1534 if (!arch_is_coherent()) 1426 if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
1535 __dma_page_dev_to_cpu(page, offset, size, dir); 1427 __dma_page_dev_to_cpu(page, offset, size, dir);
1536 1428
1537 iommu_unmap(mapping->domain, iova, len); 1429 iommu_unmap(mapping->domain, iova, len);
@@ -1571,6 +1463,7 @@ struct dma_map_ops iommu_ops = {
1571 .alloc = arm_iommu_alloc_attrs, 1463 .alloc = arm_iommu_alloc_attrs,
1572 .free = arm_iommu_free_attrs, 1464 .free = arm_iommu_free_attrs,
1573 .mmap = arm_iommu_mmap_attrs, 1465 .mmap = arm_iommu_mmap_attrs,
1466 .get_sgtable = arm_iommu_get_sgtable,
1574 1467
1575 .map_page = arm_iommu_map_page, 1468 .map_page = arm_iommu_map_page,
1576 .unmap_page = arm_iommu_unmap_page, 1469 .unmap_page = arm_iommu_unmap_page,