ARM: integrate CMA with DMA-mapping subsystem

This patch adds support for CMA to dma-mapping subsystem for ARM architecture. By default a global CMA area is used, but specific devices are allowed to have their private memory areas if required (they can be created with dma_declare_contiguous() function during board initialisation). Contiguous memory areas reserved for DMA are remapped with 2-level page tables on boot. Once a buffer is requested, a low memory kernel mapping is updated to to match requested memory access type. GFP_ATOMIC allocations are performed from special pool which is created early during boot. This way remapping page attributes is not needed on allocation time. CMA has been enabled unconditionally for ARMv6+ systems. Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com> CC: Michal Nazarewicz <mina86@mina86.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Tested-by: Rob Clark <rob.clark@linaro.org> Tested-by: Ohad Ben-Cohen <ohad@wizery.com> Tested-by: Benjamin Gaignard <benjamin.gaignard@linaro.org> Tested-by: Robert Nelson <robertcnelson@gmail.com> Tested-by: Barry Song <Baohua.Song@csr.com>
author: Marek Szyprowski <m.szyprowski@samsung.com> 2011-12-29 07:09:51 -0500
committer: Marek Szyprowski <m.szyprowski@samsung.com> 2012-05-21 09:09:38 -0400
commit: c79095092834a18ae74cfc08def1a5a101dc106c (patch)
tree: c6cd81c38b92dcdb269288ab9a125bc13f4bb339 /arch/arm/mm/dma-mapping.c
parent: 0a2b9a6ea93650b8a00f9fd5ee8fdd25671e2df6 (diff)
1 files changed, 303 insertions, 67 deletions
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index db23ae4aaaab..302f5bfb17f4 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -17,7 +17,9 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/dma-contiguous.h>
 #include <linux/highmem.h>
+#include <linux/memblock.h>
 #include <linux/slab.h>
 #include <asm/memory.h>
@@ -26,6 +28,9 @@
 #include <asm/tlbflush.h>
 #include <asm/sizes.h>
 #include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/system_info.h>
+#include <asm/dma-contiguous.h>
 #include "mm.h"
@@ -56,6 +61,19 @@ static u64 get_coherent_dma_mask(struct device *dev)
        return mask;
 }
+static void __dma_clear_buffer(struct page *page, size_t size)
+{
+        void *ptr;
+        /*
+         * Ensure that the allocated pages are zeroed, and that any data
+         * lurking in the kernel direct-mapped region is invalidated.
+         */
+        ptr = page_address(page);
+        memset(ptr, 0, size);
+        dmac_flush_range(ptr, ptr + size);
+        outer_flush_range(__pa(ptr), __pa(ptr) + size);
+}
 /*
 * Allocate a DMA buffer for 'dev' of size 'size' using the
 * specified gfp mask.  Note that 'size' must be page aligned.
@@ -64,23 +82,6 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
 {
        unsigned long order = get_order(size);
        struct page *page, *p, *e;
-        void *ptr;
-        u64 mask = get_coherent_dma_mask(dev);
-#ifdef CONFIG_DMA_API_DEBUG
-        u64 limit = (mask + 1) & ~mask;
-        if (limit && size >= limit) {
-                dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n",
-                        size, mask);
-                return NULL;
-        }
-#endif
-        if (!mask)
-                return NULL;
-        if (mask < 0xffffffffULL)
-                gfp |= GFP_DMA;
        page = alloc_pages(gfp, order);
        if (!page)
@@ -93,14 +94,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
        for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
                __free_page(p);
-        /*
+        __dma_clear_buffer(page, size);
-         * Ensure that the allocated pages are zeroed, and that any data
-         * lurking in the kernel direct-mapped region is invalidated.
-         */
-        ptr = page_address(page);
-        memset(ptr, 0, size);
-        dmac_flush_range(ptr, ptr + size);
-        outer_flush_range(__pa(ptr), __pa(ptr) + size);
        return page;
 }
@@ -170,6 +164,9 @@ static int __init consistent_init(void)
        unsigned long base = consistent_base;
        unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT;
+        if (cpu_architecture() >= CPU_ARCH_ARMv6)
+                return 0;
        consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL);
        if (!consistent_pte) {
                pr_err("%s: no memory\n", __func__);
@@ -210,9 +207,101 @@ static int __init consistent_init(void)
        return ret;
 }
 core_initcall(consistent_init);
+static void *__alloc_from_contiguous(struct device *dev, size_t size,
+                                     pgprot_t prot, struct page **ret_page);
+static struct arm_vmregion_head coherent_head = {
+        .vm_lock        = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock),
+        .vm_list        = LIST_HEAD_INIT(coherent_head.vm_list),
+};
+size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8;
+static int __init early_coherent_pool(char *p)
+{
+        coherent_pool_size = memparse(p, &p);
+        return 0;
+}
+early_param("coherent_pool", early_coherent_pool);
+/*
+ * Initialise the coherent pool for atomic allocations.
+ */
+static int __init coherent_init(void)
+{
+        pgprot_t prot = pgprot_dmacoherent(pgprot_kernel);
+        size_t size = coherent_pool_size;
+        struct page *page;
+        void *ptr;
+        if (cpu_architecture() < CPU_ARCH_ARMv6)
+                return 0;
+        ptr = __alloc_from_contiguous(NULL, size, prot, &page);
+        if (ptr) {
+                coherent_head.vm_start = (unsigned long) ptr;
+                coherent_head.vm_end = (unsigned long) ptr + size;
+                printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n",
+                       (unsigned)size / 1024);
+                return 0;
+        }
+        printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
+               (unsigned)size / 1024);
+        return -ENOMEM;
+}
+/*
+ * CMA is activated by core_initcall, so we must be called after it.
+ */
+postcore_initcall(coherent_init);
+struct dma_contig_early_reserve {
+        phys_addr_t base;
+        unsigned long size;
+};
+static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata;
+static int dma_mmu_remap_num __initdata;
+void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
+{
+        dma_mmu_remap[dma_mmu_remap_num].base = base;
+        dma_mmu_remap[dma_mmu_remap_num].size = size;
+        dma_mmu_remap_num++;
+}
+void __init dma_contiguous_remap(void)
+{
+        int i;
+        for (i = 0; i < dma_mmu_remap_num; i++) {
+                phys_addr_t start = dma_mmu_remap[i].base;
+                phys_addr_t end = start + dma_mmu_remap[i].size;
+                struct map_desc map;
+                unsigned long addr;
+                if (end > arm_lowmem_limit)
+                        end = arm_lowmem_limit;
+                if (start >= end)
+                        return;
+                map.pfn = __phys_to_pfn(start);
+                map.virtual = __phys_to_virt(start);
+                map.length = end - start;
+                map.type = MT_MEMORY_DMA_READY;
+                /*
+                 * Clear previous low-memory mapping
+                 */
+                for (addr = __phys_to_virt(start); addr < __phys_to_virt(end);
+                     addr += PGDIR_SIZE)
+                        pmd_clear(pmd_off_k(addr));
+                iotable_init(&map, 1);
+        }
+}
 static void *
 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
        const void *caller)
@@ -319,20 +408,173 @@ static void __dma_free_remap(void *cpu_addr, size_t size)
        arm_vmregion_free(&consistent_head, c);
 }
+static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr,
+                            void *data)
+{
+        struct page *page = virt_to_page(addr);
+        pgprot_t prot = *(pgprot_t *)data;
+        set_pte_ext(pte, mk_pte(page, prot), 0);
+        return 0;
+}
+static void __dma_remap(struct page *page, size_t size, pgprot_t prot)
+{
+        unsigned long start = (unsigned long) page_address(page);
+        unsigned end = start + size;
+        apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot);
+        dsb();
+        flush_tlb_kernel_range(start, end);
+}
+static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
+                                 pgprot_t prot, struct page **ret_page,
+                                 const void *caller)
+{
+        struct page *page;
+        void *ptr;
+        page = __dma_alloc_buffer(dev, size, gfp);
+        if (!page)
+                return NULL;
+        ptr = __dma_alloc_remap(page, size, gfp, prot, caller);
+        if (!ptr) {
+                __dma_free_buffer(page, size);
+                return NULL;
+        }
+        *ret_page = page;
+        return ptr;
+}
+static void *__alloc_from_pool(struct device *dev, size_t size,
+                               struct page **ret_page, const void *caller)
+{
+        struct arm_vmregion *c;
+        size_t align;
+        if (!coherent_head.vm_start) {
+                printk(KERN_ERR "%s: coherent pool not initialised!\n",
+                       __func__);
+                dump_stack();
+                return NULL;
+        }
+        /*
+         * Align the region allocation - allocations from pool are rather
+         * small, so align them to their order in pages, minimum is a page
+         * size. This helps reduce fragmentation of the DMA space.
+         */
+        align = PAGE_SIZE << get_order(size);
+        c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller);
+        if (c) {
+                void *ptr = (void *)c->vm_start;
+                struct page *page = virt_to_page(ptr);
+                *ret_page = page;
+                return ptr;
+        }
+        return NULL;
+}
+static int __free_from_pool(void *cpu_addr, size_t size)
+{
+        unsigned long start = (unsigned long)cpu_addr;
+        unsigned long end = start + size;
+        struct arm_vmregion *c;
+        if (start < coherent_head.vm_start || end > coherent_head.vm_end)
+                return 0;
+        c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start);
+        if ((c->vm_end - c->vm_start) != size) {
+                printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
+                       __func__, c->vm_end - c->vm_start, size);
+                dump_stack();
+                size = c->vm_end - c->vm_start;
+        }
+        arm_vmregion_free(&coherent_head, c);
+        return 1;
+}
+static void *__alloc_from_contiguous(struct device *dev, size_t size,
+                                     pgprot_t prot, struct page **ret_page)
+{
+        unsigned long order = get_order(size);
+        size_t count = size >> PAGE_SHIFT;
+        struct page *page;
+        page = dma_alloc_from_contiguous(dev, count, order);
+        if (!page)
+                return NULL;
+        __dma_clear_buffer(page, size);
+        __dma_remap(page, size, prot);
+        *ret_page = page;
+        return page_address(page);
+}
+static void __free_from_contiguous(struct device *dev, struct page *page,
+                                   size_t size)
+{
+        __dma_remap(page, size, pgprot_kernel);
+        dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
+}
+#define nommu() 0
 #else   /* !CONFIG_MMU */
-#define __dma_alloc_remap(page, size, gfp, prot, c)     page_address(page)
+#define nommu() 1
-#define __dma_free_remap(addr, size)                    do { } while (0)
+#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c)      NULL
+#define __alloc_from_pool(dev, size, ret_page, c)               NULL
+#define __alloc_from_contiguous(dev, size, prot, ret)           NULL
+#define __free_from_pool(cpu_addr, size)                        0
+#define __free_from_contiguous(dev, page, size)                 do { } while (0)
+#define __dma_free_remap(cpu_addr, size)                        do { } while (0)
 #endif  /* CONFIG_MMU */
-static void *
+static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
-__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
+                                   struct page **ret_page)
-            pgprot_t prot, const void *caller)
 {
        struct page *page;
+        page = __dma_alloc_buffer(dev, size, gfp);
+        if (!page)
+                return NULL;
+        *ret_page = page;
+        return page_address(page);
+}
+static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+                         gfp_t gfp, pgprot_t prot, const void *caller)
+{
+        u64 mask = get_coherent_dma_mask(dev);
+        struct page *page;
        void *addr;
+#ifdef CONFIG_DMA_API_DEBUG
+        u64 limit = (mask + 1) & ~mask;
+        if (limit && size >= limit) {
+                dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n",
+                        size, mask);
+                return NULL;
+        }
+#endif
+        if (!mask)
+                return NULL;
+        if (mask < 0xffffffffULL)
+                gfp |= GFP_DMA;
        /*
         * Following is a work-around (a.k.a. hack) to prevent pages
         * with __GFP_COMP being passed to split_page() which cannot
@@ -345,19 +587,17 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
        *handle = ~0;
        size = PAGE_ALIGN(size);
-        page = __dma_alloc_buffer(dev, size, gfp);
+        if (arch_is_coherent() || nommu())
-        if (!page)
+                addr = __alloc_simple_buffer(dev, size, gfp, &page);
-                return NULL;
+        else if (cpu_architecture() < CPU_ARCH_ARMv6)
+                addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
-        if (!arch_is_coherent())
+        else if (gfp & GFP_ATOMIC)
-                addr = __dma_alloc_remap(page, size, gfp, prot, caller);
+                addr = __alloc_from_pool(dev, size, &page, caller);
        else
-                addr = page_address(page);
+                addr = __alloc_from_contiguous(dev, size, prot, &page);
        if (addr)
                *handle = pfn_to_dma(dev, page_to_pfn(page));
-        else
-                __dma_free_buffer(page, size);
        return addr;
 }
@@ -366,8 +606,8 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
 * Allocate DMA-coherent memory space and return both the kernel remapped
 * virtual and bus address for that space.
 */
-void *
+void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle,
-dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
+                         gfp_t gfp)
 {
        void *memory;
@@ -398,25 +638,11 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma,
 {
        int ret = -ENXIO;
 #ifdef CONFIG_MMU
-        unsigned long user_size, kern_size;
+        unsigned long pfn = dma_to_pfn(dev, dma_addr);
-        struct arm_vmregion *c;
+        ret = remap_pfn_range(vma, vma->vm_start,
+                              pfn + vma->vm_pgoff,
-        user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+                              vma->vm_end - vma->vm_start,
+                              vma->vm_page_prot);
-        c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
-        if (c) {
-                unsigned long off = vma->vm_pgoff;
-                kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
-                if (off < kern_size &&
-                    user_size <= (kern_size - off)) {
-                        ret = remap_pfn_range(vma, vma->vm_start,
-                                              page_to_pfn(c->vm_pages) + off,
-                                              user_size << PAGE_SHIFT,
-                                              vma->vm_page_prot);
-                }
-        }
 #endif  /* CONFIG_MMU */
        return ret;
@@ -438,23 +664,33 @@ int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
 }
 EXPORT_SYMBOL(dma_mmap_writecombine);
 /*
- * free a page as defined by the above mapping.
+ * Free a buffer as defined by the above mapping.
- * Must not be called with IRQs disabled.
 */
 void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)
 {
-        WARN_ON(irqs_disabled());
+        struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
        if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
                return;
        size = PAGE_ALIGN(size);
-        if (!arch_is_coherent())
+        if (arch_is_coherent() || nommu()) {
+                __dma_free_buffer(page, size);
+        } else if (cpu_architecture() < CPU_ARCH_ARMv6) {
                __dma_free_remap(cpu_addr, size);
+                __dma_free_buffer(page, size);
-        __dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size);
+        } else {
+                if (__free_from_pool(cpu_addr, size))
+                        return;
+                /*
+                 * Non-atomic allocations cannot be freed with IRQs disabled
+                 */
+                WARN_ON(irqs_disabled());
+                __free_from_contiguous(dev, page, size);
+        }
 }
 EXPORT_SYMBOL(dma_free_coherent);
author	Marek Szyprowski <m.szyprowski@samsung.com>	2011-12-29 07:09:51 -0500
committer	Marek Szyprowski <m.szyprowski@samsung.com>	2012-05-21 09:09:38 -0400
commit	c79095092834a18ae74cfc08def1a5a101dc106c (patch)
tree	c6cd81c38b92dcdb269288ab9a125bc13f4bb339 /arch/arm/mm/dma-mapping.c
parent	0a2b9a6ea93650b8a00f9fd5ee8fdd25671e2df6 (diff)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index db23ae4aaaab..302f5bfb17f4 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c
@@ -17,7 +17,9 @@
17	#include <linux/init.h>	17	#include <linux/init.h>
18	#include <linux/device.h>	18	#include <linux/device.h>
19	#include <linux/dma-mapping.h>	19	#include <linux/dma-mapping.h>
		20	#include <linux/dma-contiguous.h>
20	#include <linux/highmem.h>	21	#include <linux/highmem.h>
		22	#include <linux/memblock.h>
21	#include <linux/slab.h>	23	#include <linux/slab.h>
22		24
23	#include <asm/memory.h>	25	#include <asm/memory.h>
@@ -26,6 +28,9 @@
26	#include <asm/tlbflush.h>	28	#include <asm/tlbflush.h>
27	#include <asm/sizes.h>	29	#include <asm/sizes.h>
28	#include <asm/mach/arch.h>	30	#include <asm/mach/arch.h>
		31	#include <asm/mach/map.h>
		32	#include <asm/system_info.h>
		33	#include <asm/dma-contiguous.h>
29		34
30	#include "mm.h"	35	#include "mm.h"
31		36
@@ -56,6 +61,19 @@ static u64 get_coherent_dma_mask(struct device *dev)
56	return mask;	61	return mask;
57	}	62	}
58		63
		64	static void __dma_clear_buffer(struct page *page, size_t size)
		65	{
		66	void *ptr;
		67	/*
		68	* Ensure that the allocated pages are zeroed, and that any data
		69	* lurking in the kernel direct-mapped region is invalidated.
		70	*/
		71	ptr = page_address(page);
		72	memset(ptr, 0, size);
		73	dmac_flush_range(ptr, ptr + size);
		74	outer_flush_range(__pa(ptr), __pa(ptr) + size);
		75	}
		76
59	/*	77	/*
60	* Allocate a DMA buffer for 'dev' of size 'size' using the	78	* Allocate a DMA buffer for 'dev' of size 'size' using the
61	* specified gfp mask. Note that 'size' must be page aligned.	79	* specified gfp mask. Note that 'size' must be page aligned.
@@ -64,23 +82,6 @@ static struct page __dma_alloc_buffer(struct device dev, size_t size, gfp_t gf
64	{	82	{
65	unsigned long order = get_order(size);	83	unsigned long order = get_order(size);
66	struct page page, p, *e;	84	struct page page, p, *e;
67	void *ptr;
68	u64 mask = get_coherent_dma_mask(dev);
69
70	#ifdef CONFIG_DMA_API_DEBUG
71	u64 limit = (mask + 1) & ~mask;
72	if (limit && size >= limit) {
73	dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n",
74	size, mask);
75	return NULL;
76	}
77	#endif
78
79	if (!mask)
80	return NULL;
81
82	if (mask < 0xffffffffULL)
83	gfp \|= GFP_DMA;
84		85
85	page = alloc_pages(gfp, order);	86	page = alloc_pages(gfp, order);
86	if (!page)	87	if (!page)
@@ -93,14 +94,7 @@ static struct page __dma_alloc_buffer(struct device dev, size_t size, gfp_t gf
93	for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)	94	for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
94	__free_page(p);	95	__free_page(p);
95		96
96	/*	97	__dma_clear_buffer(page, size);
97	* Ensure that the allocated pages are zeroed, and that any data
98	* lurking in the kernel direct-mapped region is invalidated.
99	*/
100	ptr = page_address(page);
101	memset(ptr, 0, size);
102	dmac_flush_range(ptr, ptr + size);
103	outer_flush_range(__pa(ptr), __pa(ptr) + size);
104		98
105	return page;	99	return page;
106	}	100	}
@@ -170,6 +164,9 @@ static int __init consistent_init(void)
170	unsigned long base = consistent_base;	164	unsigned long base = consistent_base;
171	unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT;	165	unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT;
172		166
		167	if (cpu_architecture() >= CPU_ARCH_ARMv6)
		168	return 0;
		169
173	consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL);	170	consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL);
174	if (!consistent_pte) {	171	if (!consistent_pte) {
175	pr_err("%s: no memory\n", __func__);	172	pr_err("%s: no memory\n", __func__);
@@ -210,9 +207,101 @@ static int __init consistent_init(void)
210		207
211	return ret;	208	return ret;
212	}	209	}
213
214	core_initcall(consistent_init);	210	core_initcall(consistent_init);
215		211
		212	static void __alloc_from_contiguous(struct device dev, size_t size,
		213	pgprot_t prot, struct page **ret_page);
		214
		215	static struct arm_vmregion_head coherent_head = {
		216	.vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock),
		217	.vm_list = LIST_HEAD_INIT(coherent_head.vm_list),
		218	};
		219
		220	size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8;
		221
		222	static int __init early_coherent_pool(char *p)
		223	{
		224	coherent_pool_size = memparse(p, &p);
		225	return 0;
		226	}
		227	early_param("coherent_pool", early_coherent_pool);
		228
		229	/*
		230	* Initialise the coherent pool for atomic allocations.
		231	*/
		232	static int __init coherent_init(void)
		233	{
		234	pgprot_t prot = pgprot_dmacoherent(pgprot_kernel);
		235	size_t size = coherent_pool_size;
		236	struct page *page;
		237	void *ptr;
		238
		239	if (cpu_architecture() < CPU_ARCH_ARMv6)
		240	return 0;
		241
		242	ptr = __alloc_from_contiguous(NULL, size, prot, &page);
		243	if (ptr) {
		244	coherent_head.vm_start = (unsigned long) ptr;
		245	coherent_head.vm_end = (unsigned long) ptr + size;
		246	printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n",
		247	(unsigned)size / 1024);
		248	return 0;
		249	}
		250	printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
		251	(unsigned)size / 1024);
		252	return -ENOMEM;
		253	}
		254	/*
		255	* CMA is activated by core_initcall, so we must be called after it.
		256	*/
		257	postcore_initcall(coherent_init);
		258
		259	struct dma_contig_early_reserve {
		260	phys_addr_t base;
		261	unsigned long size;
		262	};
		263
		264	static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata;
		265
		266	static int dma_mmu_remap_num __initdata;
		267
		268	void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
		269	{
		270	dma_mmu_remap[dma_mmu_remap_num].base = base;
		271	dma_mmu_remap[dma_mmu_remap_num].size = size;
		272	dma_mmu_remap_num++;
		273	}
		274
		275	void __init dma_contiguous_remap(void)
		276	{
		277	int i;
		278	for (i = 0; i < dma_mmu_remap_num; i++) {
		279	phys_addr_t start = dma_mmu_remap[i].base;
		280	phys_addr_t end = start + dma_mmu_remap[i].size;
		281	struct map_desc map;
		282	unsigned long addr;
		283
		284	if (end > arm_lowmem_limit)
		285	end = arm_lowmem_limit;
		286	if (start >= end)
		287	return;
		288
		289	map.pfn = __phys_to_pfn(start);
		290	map.virtual = __phys_to_virt(start);
		291	map.length = end - start;
		292	map.type = MT_MEMORY_DMA_READY;
		293
		294	/*
		295	* Clear previous low-memory mapping
		296	*/
		297	for (addr = __phys_to_virt(start); addr < __phys_to_virt(end);
		298	addr += PGDIR_SIZE)
		299	pmd_clear(pmd_off_k(addr));
		300
		301	iotable_init(&map, 1);
		302	}
		303	}
		304
216	static void *	305	static void *
217	__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,	306	__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
218	const void *caller)	307	const void *caller)
@@ -319,20 +408,173 @@ static void __dma_free_remap(void *cpu_addr, size_t size)
319	arm_vmregion_free(&consistent_head, c);	408	arm_vmregion_free(&consistent_head, c);
320	}	409	}
321		410
		411	static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr,
		412	void *data)
		413	{
		414	struct page *page = virt_to_page(addr);
		415	pgprot_t prot = (pgprot_t )data;
		416
		417	set_pte_ext(pte, mk_pte(page, prot), 0);
		418	return 0;
		419	}
		420
		421	static void __dma_remap(struct page *page, size_t size, pgprot_t prot)
		422	{
		423	unsigned long start = (unsigned long) page_address(page);
		424	unsigned end = start + size;
		425
		426	apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot);
		427	dsb();
		428	flush_tlb_kernel_range(start, end);
		429	}
		430
		431	static void __alloc_remap_buffer(struct device dev, size_t size, gfp_t gfp,
		432	pgprot_t prot, struct page **ret_page,
		433	const void *caller)
		434	{
		435	struct page *page;
		436	void *ptr;
		437	page = __dma_alloc_buffer(dev, size, gfp);
		438	if (!page)
		439	return NULL;
		440
		441	ptr = __dma_alloc_remap(page, size, gfp, prot, caller);
		442	if (!ptr) {
		443	__dma_free_buffer(page, size);
		444	return NULL;
		445	}
		446
		447	*ret_page = page;
		448	return ptr;
		449	}
		450
		451	static void __alloc_from_pool(struct device dev, size_t size,
		452	struct page *ret_page, const void caller)
		453	{
		454	struct arm_vmregion *c;
		455	size_t align;
		456
		457	if (!coherent_head.vm_start) {
		458	printk(KERN_ERR "%s: coherent pool not initialised!\n",
		459	__func__);
		460	dump_stack();
		461	return NULL;
		462	}
		463
		464	/*
		465	* Align the region allocation - allocations from pool are rather
		466	* small, so align them to their order in pages, minimum is a page
		467	* size. This helps reduce fragmentation of the DMA space.
		468	*/
		469	align = PAGE_SIZE << get_order(size);
		470	c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller);
		471	if (c) {
		472	void ptr = (void )c->vm_start;
		473	struct page *page = virt_to_page(ptr);
		474	*ret_page = page;
		475	return ptr;
		476	}
		477	return NULL;
		478	}
		479
		480	static int __free_from_pool(void *cpu_addr, size_t size)
		481	{
		482	unsigned long start = (unsigned long)cpu_addr;
		483	unsigned long end = start + size;
		484	struct arm_vmregion *c;
		485
		486	if (start < coherent_head.vm_start \|\| end > coherent_head.vm_end)
		487	return 0;
		488
		489	c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start);
		490
		491	if ((c->vm_end - c->vm_start) != size) {
		492	printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
		493	__func__, c->vm_end - c->vm_start, size);
		494	dump_stack();
		495	size = c->vm_end - c->vm_start;
		496	}
		497
		498	arm_vmregion_free(&coherent_head, c);
		499	return 1;
		500	}
		501
		502	static void __alloc_from_contiguous(struct device dev, size_t size,
		503	pgprot_t prot, struct page **ret_page)
		504	{
		505	unsigned long order = get_order(size);
		506	size_t count = size >> PAGE_SHIFT;
		507	struct page *page;
		508
		509	page = dma_alloc_from_contiguous(dev, count, order);
		510	if (!page)
		511	return NULL;
		512
		513	__dma_clear_buffer(page, size);
		514	__dma_remap(page, size, prot);
		515
		516	*ret_page = page;
		517	return page_address(page);
		518	}
		519
		520	static void __free_from_contiguous(struct device dev, struct page page,
		521	size_t size)
		522	{
		523	__dma_remap(page, size, pgprot_kernel);
		524	dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
		525	}
		526
		527	#define nommu() 0
		528
322	#else /* !CONFIG_MMU */	529	#else /* !CONFIG_MMU */
323		530
324	#define __dma_alloc_remap(page, size, gfp, prot, c) page_address(page)	531	#define nommu() 1
325	#define __dma_free_remap(addr, size) do { } while (0)	532
		533	#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL
		534	#define __alloc_from_pool(dev, size, ret_page, c) NULL
		535	#define __alloc_from_contiguous(dev, size, prot, ret) NULL
		536	#define __free_from_pool(cpu_addr, size) 0
		537	#define __free_from_contiguous(dev, page, size) do { } while (0)
		538	#define __dma_free_remap(cpu_addr, size) do { } while (0)
326		539
327	#endif /* CONFIG_MMU */	540	#endif /* CONFIG_MMU */
328		541
329	static void *	542	static void __alloc_simple_buffer(struct device dev, size_t size, gfp_t gfp,
330	__dma_alloc(struct device dev, size_t size, dma_addr_t handle, gfp_t gfp,	543	struct page **ret_page)
331	pgprot_t prot, const void *caller)
332	{	544	{
333	struct page *page;	545	struct page *page;
		546	page = __dma_alloc_buffer(dev, size, gfp);
		547	if (!page)
		548	return NULL;
		549
		550	*ret_page = page;
		551	return page_address(page);
		552	}
		553
		554
		555
		556	static void __dma_alloc(struct device dev, size_t size, dma_addr_t *handle,
		557	gfp_t gfp, pgprot_t prot, const void *caller)
		558	{
		559	u64 mask = get_coherent_dma_mask(dev);
		560	struct page *page;
334	void *addr;	561	void *addr;
335		562
		563	#ifdef CONFIG_DMA_API_DEBUG
		564	u64 limit = (mask + 1) & ~mask;
		565	if (limit && size >= limit) {
		566	dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n",
		567	size, mask);
		568	return NULL;
		569	}
		570	#endif
		571
		572	if (!mask)
		573	return NULL;
		574
		575	if (mask < 0xffffffffULL)
		576	gfp \|= GFP_DMA;
		577
336	/*	578	/*
337	* Following is a work-around (a.k.a. hack) to prevent pages	579	* Following is a work-around (a.k.a. hack) to prevent pages
338	* with __GFP_COMP being passed to split_page() which cannot	580	* with __GFP_COMP being passed to split_page() which cannot
@@ -345,19 +587,17 @@ __dma_alloc(struct device dev, size_t size, dma_addr_t handle, gfp_t gfp,
345	*handle = ~0;	587	*handle = ~0;
346	size = PAGE_ALIGN(size);	588	size = PAGE_ALIGN(size);
347		589
348	page = __dma_alloc_buffer(dev, size, gfp);	590	if (arch_is_coherent() \|\| nommu())
349	if (!page)	591	addr = __alloc_simple_buffer(dev, size, gfp, &page);
350	return NULL;	592	else if (cpu_architecture() < CPU_ARCH_ARMv6)
351		593	addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
352	if (!arch_is_coherent())	594	else if (gfp & GFP_ATOMIC)
353	addr = __dma_alloc_remap(page, size, gfp, prot, caller);	595	addr = __alloc_from_pool(dev, size, &page, caller);
354	else	596	else
355	addr = page_address(page);	597	addr = __alloc_from_contiguous(dev, size, prot, &page);
356		598
357	if (addr)	599	if (addr)
358	*handle = pfn_to_dma(dev, page_to_pfn(page));	600	*handle = pfn_to_dma(dev, page_to_pfn(page));
359	else
360	__dma_free_buffer(page, size);
361		601
362	return addr;	602	return addr;
363	}	603	}
@@ -366,8 +606,8 @@ __dma_alloc(struct device dev, size_t size, dma_addr_t handle, gfp_t gfp,
366	* Allocate DMA-coherent memory space and return both the kernel remapped	606	* Allocate DMA-coherent memory space and return both the kernel remapped
367	* virtual and bus address for that space.	607	* virtual and bus address for that space.
368	*/	608	*/
369	void *	609	void dma_alloc_coherent(struct device dev, size_t size, dma_addr_t *handle,
370	dma_alloc_coherent(struct device dev, size_t size, dma_addr_t handle, gfp_t gfp)	610	gfp_t gfp)
371	{	611	{
372	void *memory;	612	void *memory;
373		613
@@ -398,25 +638,11 @@ static int dma_mmap(struct device dev, struct vm_area_struct vma,
398	{	638	{
399	int ret = -ENXIO;	639	int ret = -ENXIO;
400	#ifdef CONFIG_MMU	640	#ifdef CONFIG_MMU
401	unsigned long user_size, kern_size;	641	unsigned long pfn = dma_to_pfn(dev, dma_addr);
402	struct arm_vmregion *c;	642	ret = remap_pfn_range(vma, vma->vm_start,
403		643	pfn + vma->vm_pgoff,
404	user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;	644	vma->vm_end - vma->vm_start,
405		645	vma->vm_page_prot);
406	c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
407	if (c) {
408	unsigned long off = vma->vm_pgoff;
409
410	kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
411
412	if (off < kern_size &&
413	user_size <= (kern_size - off)) {
414	ret = remap_pfn_range(vma, vma->vm_start,
415	page_to_pfn(c->vm_pages) + off,
416	user_size << PAGE_SHIFT,
417	vma->vm_page_prot);
418	}
419	}
420	#endif /* CONFIG_MMU */	646	#endif /* CONFIG_MMU */
421		647
422	return ret;	648	return ret;
@@ -438,23 +664,33 @@ int dma_mmap_writecombine(struct device dev, struct vm_area_struct vma,
438	}	664	}
439	EXPORT_SYMBOL(dma_mmap_writecombine);	665	EXPORT_SYMBOL(dma_mmap_writecombine);
440		666
		667
441	/*	668	/*
442	* free a page as defined by the above mapping.	669	* Free a buffer as defined by the above mapping.
443	* Must not be called with IRQs disabled.
444	*/	670	*/
445	void dma_free_coherent(struct device dev, size_t size, void cpu_addr, dma_addr_t handle)	671	void dma_free_coherent(struct device dev, size_t size, void cpu_addr, dma_addr_t handle)
446	{	672	{
447	WARN_ON(irqs_disabled());	673	struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
448		674
449	if (dma_release_from_coherent(dev, get_order(size), cpu_addr))	675	if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
450	return;	676	return;
451		677
452	size = PAGE_ALIGN(size);	678	size = PAGE_ALIGN(size);
453		679
454	if (!arch_is_coherent())	680	if (arch_is_coherent() \|\| nommu()) {
		681	__dma_free_buffer(page, size);
		682	} else if (cpu_architecture() < CPU_ARCH_ARMv6) {
455	__dma_free_remap(cpu_addr, size);	683	__dma_free_remap(cpu_addr, size);
456		684	__dma_free_buffer(page, size);
457	__dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size);	685	} else {
		686	if (__free_from_pool(cpu_addr, size))
		687	return;
		688	/*
		689	* Non-atomic allocations cannot be freed with IRQs disabled
		690	*/
		691	WARN_ON(irqs_disabled());
		692	__free_from_contiguous(dev, page, size);
		693	}
458	}	694	}
459	EXPORT_SYMBOL(dma_free_coherent);	695	EXPORT_SYMBOL(dma_free_coherent);
460		696