aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/mm/dma-mapping.c
diff options
context:
space:
mode:
authorMarek Szyprowski <m.szyprowski@samsung.com>2011-12-29 07:09:51 -0500
committerMarek Szyprowski <m.szyprowski@samsung.com>2012-05-21 09:09:38 -0400
commitc79095092834a18ae74cfc08def1a5a101dc106c (patch)
treec6cd81c38b92dcdb269288ab9a125bc13f4bb339 /arch/arm/mm/dma-mapping.c
parent0a2b9a6ea93650b8a00f9fd5ee8fdd25671e2df6 (diff)
ARM: integrate CMA with DMA-mapping subsystem
This patch adds support for CMA to dma-mapping subsystem for ARM architecture. By default a global CMA area is used, but specific devices are allowed to have their private memory areas if required (they can be created with dma_declare_contiguous() function during board initialisation). Contiguous memory areas reserved for DMA are remapped with 2-level page tables on boot. Once a buffer is requested, a low memory kernel mapping is updated to to match requested memory access type. GFP_ATOMIC allocations are performed from special pool which is created early during boot. This way remapping page attributes is not needed on allocation time. CMA has been enabled unconditionally for ARMv6+ systems. Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com> CC: Michal Nazarewicz <mina86@mina86.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Tested-by: Rob Clark <rob.clark@linaro.org> Tested-by: Ohad Ben-Cohen <ohad@wizery.com> Tested-by: Benjamin Gaignard <benjamin.gaignard@linaro.org> Tested-by: Robert Nelson <robertcnelson@gmail.com> Tested-by: Barry Song <Baohua.Song@csr.com>
Diffstat (limited to 'arch/arm/mm/dma-mapping.c')
-rw-r--r--arch/arm/mm/dma-mapping.c370
1 files changed, 303 insertions, 67 deletions
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index db23ae4aaaab..302f5bfb17f4 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -17,7 +17,9 @@
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/device.h> 18#include <linux/device.h>
19#include <linux/dma-mapping.h> 19#include <linux/dma-mapping.h>
20#include <linux/dma-contiguous.h>
20#include <linux/highmem.h> 21#include <linux/highmem.h>
22#include <linux/memblock.h>
21#include <linux/slab.h> 23#include <linux/slab.h>
22 24
23#include <asm/memory.h> 25#include <asm/memory.h>
@@ -26,6 +28,9 @@
26#include <asm/tlbflush.h> 28#include <asm/tlbflush.h>
27#include <asm/sizes.h> 29#include <asm/sizes.h>
28#include <asm/mach/arch.h> 30#include <asm/mach/arch.h>
31#include <asm/mach/map.h>
32#include <asm/system_info.h>
33#include <asm/dma-contiguous.h>
29 34
30#include "mm.h" 35#include "mm.h"
31 36
@@ -56,6 +61,19 @@ static u64 get_coherent_dma_mask(struct device *dev)
56 return mask; 61 return mask;
57} 62}
58 63
64static void __dma_clear_buffer(struct page *page, size_t size)
65{
66 void *ptr;
67 /*
68 * Ensure that the allocated pages are zeroed, and that any data
69 * lurking in the kernel direct-mapped region is invalidated.
70 */
71 ptr = page_address(page);
72 memset(ptr, 0, size);
73 dmac_flush_range(ptr, ptr + size);
74 outer_flush_range(__pa(ptr), __pa(ptr) + size);
75}
76
59/* 77/*
60 * Allocate a DMA buffer for 'dev' of size 'size' using the 78 * Allocate a DMA buffer for 'dev' of size 'size' using the
61 * specified gfp mask. Note that 'size' must be page aligned. 79 * specified gfp mask. Note that 'size' must be page aligned.
@@ -64,23 +82,6 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
64{ 82{
65 unsigned long order = get_order(size); 83 unsigned long order = get_order(size);
66 struct page *page, *p, *e; 84 struct page *page, *p, *e;
67 void *ptr;
68 u64 mask = get_coherent_dma_mask(dev);
69
70#ifdef CONFIG_DMA_API_DEBUG
71 u64 limit = (mask + 1) & ~mask;
72 if (limit && size >= limit) {
73 dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n",
74 size, mask);
75 return NULL;
76 }
77#endif
78
79 if (!mask)
80 return NULL;
81
82 if (mask < 0xffffffffULL)
83 gfp |= GFP_DMA;
84 85
85 page = alloc_pages(gfp, order); 86 page = alloc_pages(gfp, order);
86 if (!page) 87 if (!page)
@@ -93,14 +94,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
93 for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) 94 for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
94 __free_page(p); 95 __free_page(p);
95 96
96 /* 97 __dma_clear_buffer(page, size);
97 * Ensure that the allocated pages are zeroed, and that any data
98 * lurking in the kernel direct-mapped region is invalidated.
99 */
100 ptr = page_address(page);
101 memset(ptr, 0, size);
102 dmac_flush_range(ptr, ptr + size);
103 outer_flush_range(__pa(ptr), __pa(ptr) + size);
104 98
105 return page; 99 return page;
106} 100}
@@ -170,6 +164,9 @@ static int __init consistent_init(void)
170 unsigned long base = consistent_base; 164 unsigned long base = consistent_base;
171 unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; 165 unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT;
172 166
167 if (cpu_architecture() >= CPU_ARCH_ARMv6)
168 return 0;
169
173 consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); 170 consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL);
174 if (!consistent_pte) { 171 if (!consistent_pte) {
175 pr_err("%s: no memory\n", __func__); 172 pr_err("%s: no memory\n", __func__);
@@ -210,9 +207,101 @@ static int __init consistent_init(void)
210 207
211 return ret; 208 return ret;
212} 209}
213
214core_initcall(consistent_init); 210core_initcall(consistent_init);
215 211
212static void *__alloc_from_contiguous(struct device *dev, size_t size,
213 pgprot_t prot, struct page **ret_page);
214
215static struct arm_vmregion_head coherent_head = {
216 .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock),
217 .vm_list = LIST_HEAD_INIT(coherent_head.vm_list),
218};
219
220size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8;
221
222static int __init early_coherent_pool(char *p)
223{
224 coherent_pool_size = memparse(p, &p);
225 return 0;
226}
227early_param("coherent_pool", early_coherent_pool);
228
229/*
230 * Initialise the coherent pool for atomic allocations.
231 */
232static int __init coherent_init(void)
233{
234 pgprot_t prot = pgprot_dmacoherent(pgprot_kernel);
235 size_t size = coherent_pool_size;
236 struct page *page;
237 void *ptr;
238
239 if (cpu_architecture() < CPU_ARCH_ARMv6)
240 return 0;
241
242 ptr = __alloc_from_contiguous(NULL, size, prot, &page);
243 if (ptr) {
244 coherent_head.vm_start = (unsigned long) ptr;
245 coherent_head.vm_end = (unsigned long) ptr + size;
246 printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n",
247 (unsigned)size / 1024);
248 return 0;
249 }
250 printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
251 (unsigned)size / 1024);
252 return -ENOMEM;
253}
254/*
255 * CMA is activated by core_initcall, so we must be called after it.
256 */
257postcore_initcall(coherent_init);
258
259struct dma_contig_early_reserve {
260 phys_addr_t base;
261 unsigned long size;
262};
263
264static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata;
265
266static int dma_mmu_remap_num __initdata;
267
268void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
269{
270 dma_mmu_remap[dma_mmu_remap_num].base = base;
271 dma_mmu_remap[dma_mmu_remap_num].size = size;
272 dma_mmu_remap_num++;
273}
274
275void __init dma_contiguous_remap(void)
276{
277 int i;
278 for (i = 0; i < dma_mmu_remap_num; i++) {
279 phys_addr_t start = dma_mmu_remap[i].base;
280 phys_addr_t end = start + dma_mmu_remap[i].size;
281 struct map_desc map;
282 unsigned long addr;
283
284 if (end > arm_lowmem_limit)
285 end = arm_lowmem_limit;
286 if (start >= end)
287 return;
288
289 map.pfn = __phys_to_pfn(start);
290 map.virtual = __phys_to_virt(start);
291 map.length = end - start;
292 map.type = MT_MEMORY_DMA_READY;
293
294 /*
295 * Clear previous low-memory mapping
296 */
297 for (addr = __phys_to_virt(start); addr < __phys_to_virt(end);
298 addr += PGDIR_SIZE)
299 pmd_clear(pmd_off_k(addr));
300
301 iotable_init(&map, 1);
302 }
303}
304
216static void * 305static void *
217__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, 306__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
218 const void *caller) 307 const void *caller)
@@ -319,20 +408,173 @@ static void __dma_free_remap(void *cpu_addr, size_t size)
319 arm_vmregion_free(&consistent_head, c); 408 arm_vmregion_free(&consistent_head, c);
320} 409}
321 410
411static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr,
412 void *data)
413{
414 struct page *page = virt_to_page(addr);
415 pgprot_t prot = *(pgprot_t *)data;
416
417 set_pte_ext(pte, mk_pte(page, prot), 0);
418 return 0;
419}
420
421static void __dma_remap(struct page *page, size_t size, pgprot_t prot)
422{
423 unsigned long start = (unsigned long) page_address(page);
424 unsigned end = start + size;
425
426 apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot);
427 dsb();
428 flush_tlb_kernel_range(start, end);
429}
430
431static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
432 pgprot_t prot, struct page **ret_page,
433 const void *caller)
434{
435 struct page *page;
436 void *ptr;
437 page = __dma_alloc_buffer(dev, size, gfp);
438 if (!page)
439 return NULL;
440
441 ptr = __dma_alloc_remap(page, size, gfp, prot, caller);
442 if (!ptr) {
443 __dma_free_buffer(page, size);
444 return NULL;
445 }
446
447 *ret_page = page;
448 return ptr;
449}
450
451static void *__alloc_from_pool(struct device *dev, size_t size,
452 struct page **ret_page, const void *caller)
453{
454 struct arm_vmregion *c;
455 size_t align;
456
457 if (!coherent_head.vm_start) {
458 printk(KERN_ERR "%s: coherent pool not initialised!\n",
459 __func__);
460 dump_stack();
461 return NULL;
462 }
463
464 /*
465 * Align the region allocation - allocations from pool are rather
466 * small, so align them to their order in pages, minimum is a page
467 * size. This helps reduce fragmentation of the DMA space.
468 */
469 align = PAGE_SIZE << get_order(size);
470 c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller);
471 if (c) {
472 void *ptr = (void *)c->vm_start;
473 struct page *page = virt_to_page(ptr);
474 *ret_page = page;
475 return ptr;
476 }
477 return NULL;
478}
479
480static int __free_from_pool(void *cpu_addr, size_t size)
481{
482 unsigned long start = (unsigned long)cpu_addr;
483 unsigned long end = start + size;
484 struct arm_vmregion *c;
485
486 if (start < coherent_head.vm_start || end > coherent_head.vm_end)
487 return 0;
488
489 c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start);
490
491 if ((c->vm_end - c->vm_start) != size) {
492 printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
493 __func__, c->vm_end - c->vm_start, size);
494 dump_stack();
495 size = c->vm_end - c->vm_start;
496 }
497
498 arm_vmregion_free(&coherent_head, c);
499 return 1;
500}
501
502static void *__alloc_from_contiguous(struct device *dev, size_t size,
503 pgprot_t prot, struct page **ret_page)
504{
505 unsigned long order = get_order(size);
506 size_t count = size >> PAGE_SHIFT;
507 struct page *page;
508
509 page = dma_alloc_from_contiguous(dev, count, order);
510 if (!page)
511 return NULL;
512
513 __dma_clear_buffer(page, size);
514 __dma_remap(page, size, prot);
515
516 *ret_page = page;
517 return page_address(page);
518}
519
520static void __free_from_contiguous(struct device *dev, struct page *page,
521 size_t size)
522{
523 __dma_remap(page, size, pgprot_kernel);
524 dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
525}
526
527#define nommu() 0
528
322#else /* !CONFIG_MMU */ 529#else /* !CONFIG_MMU */
323 530
324#define __dma_alloc_remap(page, size, gfp, prot, c) page_address(page) 531#define nommu() 1
325#define __dma_free_remap(addr, size) do { } while (0) 532
533#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL
534#define __alloc_from_pool(dev, size, ret_page, c) NULL
535#define __alloc_from_contiguous(dev, size, prot, ret) NULL
536#define __free_from_pool(cpu_addr, size) 0
537#define __free_from_contiguous(dev, page, size) do { } while (0)
538#define __dma_free_remap(cpu_addr, size) do { } while (0)
326 539
327#endif /* CONFIG_MMU */ 540#endif /* CONFIG_MMU */
328 541
329static void * 542static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
330__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, 543 struct page **ret_page)
331 pgprot_t prot, const void *caller)
332{ 544{
333 struct page *page; 545 struct page *page;
546 page = __dma_alloc_buffer(dev, size, gfp);
547 if (!page)
548 return NULL;
549
550 *ret_page = page;
551 return page_address(page);
552}
553
554
555
556static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
557 gfp_t gfp, pgprot_t prot, const void *caller)
558{
559 u64 mask = get_coherent_dma_mask(dev);
560 struct page *page;
334 void *addr; 561 void *addr;
335 562
563#ifdef CONFIG_DMA_API_DEBUG
564 u64 limit = (mask + 1) & ~mask;
565 if (limit && size >= limit) {
566 dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n",
567 size, mask);
568 return NULL;
569 }
570#endif
571
572 if (!mask)
573 return NULL;
574
575 if (mask < 0xffffffffULL)
576 gfp |= GFP_DMA;
577
336 /* 578 /*
337 * Following is a work-around (a.k.a. hack) to prevent pages 579 * Following is a work-around (a.k.a. hack) to prevent pages
338 * with __GFP_COMP being passed to split_page() which cannot 580 * with __GFP_COMP being passed to split_page() which cannot
@@ -345,19 +587,17 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
345 *handle = ~0; 587 *handle = ~0;
346 size = PAGE_ALIGN(size); 588 size = PAGE_ALIGN(size);
347 589
348 page = __dma_alloc_buffer(dev, size, gfp); 590 if (arch_is_coherent() || nommu())
349 if (!page) 591 addr = __alloc_simple_buffer(dev, size, gfp, &page);
350 return NULL; 592 else if (cpu_architecture() < CPU_ARCH_ARMv6)
351 593 addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
352 if (!arch_is_coherent()) 594 else if (gfp & GFP_ATOMIC)
353 addr = __dma_alloc_remap(page, size, gfp, prot, caller); 595 addr = __alloc_from_pool(dev, size, &page, caller);
354 else 596 else
355 addr = page_address(page); 597 addr = __alloc_from_contiguous(dev, size, prot, &page);
356 598
357 if (addr) 599 if (addr)
358 *handle = pfn_to_dma(dev, page_to_pfn(page)); 600 *handle = pfn_to_dma(dev, page_to_pfn(page));
359 else
360 __dma_free_buffer(page, size);
361 601
362 return addr; 602 return addr;
363} 603}
@@ -366,8 +606,8 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
366 * Allocate DMA-coherent memory space and return both the kernel remapped 606 * Allocate DMA-coherent memory space and return both the kernel remapped
367 * virtual and bus address for that space. 607 * virtual and bus address for that space.
368 */ 608 */
369void * 609void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle,
370dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) 610 gfp_t gfp)
371{ 611{
372 void *memory; 612 void *memory;
373 613
@@ -398,25 +638,11 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma,
398{ 638{
399 int ret = -ENXIO; 639 int ret = -ENXIO;
400#ifdef CONFIG_MMU 640#ifdef CONFIG_MMU
401 unsigned long user_size, kern_size; 641 unsigned long pfn = dma_to_pfn(dev, dma_addr);
402 struct arm_vmregion *c; 642 ret = remap_pfn_range(vma, vma->vm_start,
403 643 pfn + vma->vm_pgoff,
404 user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 644 vma->vm_end - vma->vm_start,
405 645 vma->vm_page_prot);
406 c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
407 if (c) {
408 unsigned long off = vma->vm_pgoff;
409
410 kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
411
412 if (off < kern_size &&
413 user_size <= (kern_size - off)) {
414 ret = remap_pfn_range(vma, vma->vm_start,
415 page_to_pfn(c->vm_pages) + off,
416 user_size << PAGE_SHIFT,
417 vma->vm_page_prot);
418 }
419 }
420#endif /* CONFIG_MMU */ 646#endif /* CONFIG_MMU */
421 647
422 return ret; 648 return ret;
@@ -438,23 +664,33 @@ int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
438} 664}
439EXPORT_SYMBOL(dma_mmap_writecombine); 665EXPORT_SYMBOL(dma_mmap_writecombine);
440 666
667
441/* 668/*
442 * free a page as defined by the above mapping. 669 * Free a buffer as defined by the above mapping.
443 * Must not be called with IRQs disabled.
444 */ 670 */
445void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) 671void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)
446{ 672{
447 WARN_ON(irqs_disabled()); 673 struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
448 674
449 if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) 675 if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
450 return; 676 return;
451 677
452 size = PAGE_ALIGN(size); 678 size = PAGE_ALIGN(size);
453 679
454 if (!arch_is_coherent()) 680 if (arch_is_coherent() || nommu()) {
681 __dma_free_buffer(page, size);
682 } else if (cpu_architecture() < CPU_ARCH_ARMv6) {
455 __dma_free_remap(cpu_addr, size); 683 __dma_free_remap(cpu_addr, size);
456 684 __dma_free_buffer(page, size);
457 __dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size); 685 } else {
686 if (__free_from_pool(cpu_addr, size))
687 return;
688 /*
689 * Non-atomic allocations cannot be freed with IRQs disabled
690 */
691 WARN_ON(irqs_disabled());
692 __free_from_contiguous(dev, page, size);
693 }
458} 694}
459EXPORT_SYMBOL(dma_free_coherent); 695EXPORT_SYMBOL(dma_free_coherent);
460 696