metag: DMA

Add DMA mapping code. Signed-off-by: James Hogan <james.hogan@imgtec.com>
author: James Hogan <james.hogan@imgtec.com> 2012-10-05 11:27:03 -0400
committer: James Hogan <james.hogan@imgtec.com> 2013-03-02 15:09:51 -0500
commit: f507758ccbed5c354cc1ce3b8f53ea072d7bc222 (patch)
tree: dd474b63b194039b5c6c97790016f55a02a93643 /arch/metag
parent: 42682c6c42a5765b2c7cccfca170368fef6191ef (diff)
2 files changed, 690 insertions, 0 deletions
diff --git a/arch/metag/include/asm/dma-mapping.h b/arch/metag/include/asm/dma-mapping.h
new file mode 100644
index 000000000000..b5f80a62fe8b
--- /dev/null
+++ b/arch/metag/include/asm/dma-mapping.h
@@ -0,0 +1,183 @@
+#ifndef _ASM_METAG_DMA_MAPPING_H
+#define _ASM_METAG_DMA_MAPPING_H
+#include <linux/mm.h>
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <linux/scatterlist.h>
+#include <asm/bug.h>
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+void *dma_alloc_coherent(struct device *dev, size_t size,
+                         dma_addr_t *dma_handle, gfp_t flag);
+void dma_free_coherent(struct device *dev, size_t size,
+                       void *vaddr, dma_addr_t dma_handle);
+void dma_sync_for_device(void *vaddr, size_t size, int dma_direction);
+void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction);
+int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+                      void *cpu_addr, dma_addr_t dma_addr, size_t size);
+int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
+                          void *cpu_addr, dma_addr_t dma_addr, size_t size);
+static inline dma_addr_t
+dma_map_single(struct device *dev, void *ptr, size_t size,
+               enum dma_data_direction direction)
+{
+        BUG_ON(!valid_dma_direction(direction));
+        WARN_ON(size == 0);
+        dma_sync_for_device(ptr, size, direction);
+        return virt_to_phys(ptr);
+}
+static inline void
+dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+                 enum dma_data_direction direction)
+{
+        BUG_ON(!valid_dma_direction(direction));
+        dma_sync_for_cpu(phys_to_virt(dma_addr), size, direction);
+}
+static inline int
+dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
+           enum dma_data_direction direction)
+{
+        struct scatterlist *sg;
+        int i;
+        BUG_ON(!valid_dma_direction(direction));
+        WARN_ON(nents == 0 || sglist[0].length == 0);
+        for_each_sg(sglist, sg, nents, i) {
+                BUG_ON(!sg_page(sg));
+                sg->dma_address = sg_phys(sg);
+                dma_sync_for_device(sg_virt(sg), sg->length, direction);
+        }
+        return nents;
+}
+static inline dma_addr_t
+dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+             size_t size, enum dma_data_direction direction)
+{
+        BUG_ON(!valid_dma_direction(direction));
+        dma_sync_for_device((void *)(page_to_phys(page) + offset), size,
+                            direction);
+        return page_to_phys(page) + offset;
+}
+static inline void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+               enum dma_data_direction direction)
+{
+        BUG_ON(!valid_dma_direction(direction));
+        dma_sync_for_cpu(phys_to_virt(dma_address), size, direction);
+}
+static inline void
+dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nhwentries,
+             enum dma_data_direction direction)
+{
+        struct scatterlist *sg;
+        int i;
+        BUG_ON(!valid_dma_direction(direction));
+        WARN_ON(nhwentries == 0 || sglist[0].length == 0);
+        for_each_sg(sglist, sg, nhwentries, i) {
+                BUG_ON(!sg_page(sg));
+                sg->dma_address = sg_phys(sg);
+                dma_sync_for_cpu(sg_virt(sg), sg->length, direction);
+        }
+}
+static inline void
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+                        enum dma_data_direction direction)
+{
+        dma_sync_for_cpu(phys_to_virt(dma_handle), size, direction);
+}
+static inline void
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+                           size_t size, enum dma_data_direction direction)
+{
+        dma_sync_for_device(phys_to_virt(dma_handle), size, direction);
+}
+static inline void
+dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+                              unsigned long offset, size_t size,
+                              enum dma_data_direction direction)
+{
+        dma_sync_for_cpu(phys_to_virt(dma_handle)+offset, size,
+                         direction);
+}
+static inline void
+dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
+                                 unsigned long offset, size_t size,
+                                 enum dma_data_direction direction)
+{
+        dma_sync_for_device(phys_to_virt(dma_handle)+offset, size,
+                            direction);
+}
+static inline void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+                    enum dma_data_direction direction)
+{
+        int i;
+        for (i = 0; i < nelems; i++, sg++)
+                dma_sync_for_cpu(sg_virt(sg), sg->length, direction);
+}
+static inline void
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+                       enum dma_data_direction direction)
+{
+        int i;
+        for (i = 0; i < nelems; i++, sg++)
+                dma_sync_for_device(sg_virt(sg), sg->length, direction);
+}
+static inline int
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+        return 0;
+}
+#define dma_supported(dev, mask)        (1)
+static inline int
+dma_set_mask(struct device *dev, u64 mask)
+{
+        if (!dev->dma_mask || !dma_supported(dev, mask))
+                return -EIO;
+        *dev->dma_mask = mask;
+        return 0;
+}
+/*
+ * dma_alloc_noncoherent() returns non-cacheable memory, so there's no need to
+ * do any flushing here.
+ */
+static inline void
+dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+               enum dma_data_direction direction)
+{
+}
+#endif
diff --git a/arch/metag/kernel/dma.c b/arch/metag/kernel/dma.c
new file mode 100644
index 000000000000..8c00dedadc54
--- /dev/null
+++ b/arch/metag/kernel/dma.c
@@ -0,0 +1,507 @@
+/*
+ *  Meta version derived from arch/powerpc/lib/dma-noncoherent.c
+ *    Copyright (C) 2008 Imagination Technologies Ltd.
+ *
+ *  PowerPC version derived from arch/arm/mm/consistent.c
+ *    Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
+ *
+ *  Copyright (C) 2000 Russell King
+ *
+ * Consistent memory allocators.  Used for DMA devices that want to
+ * share uncached memory with the processor core.  The function return
+ * is the virtual address and 'dma_handle' is the physical address.
+ * Mostly stolen from the ARM port, with some changes for PowerPC.
+ *                                              -- Dan
+ *
+ * Reorganized to get rid of the arch-specific consistent_* functions
+ * and provide non-coherent implementations for the DMA API. -Matt
+ *
+ * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
+ * implementation. This is pulled straight from ARM and barely
+ * modified. -Matt
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/highmem.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+#define CONSISTENT_OFFSET(x)    (((unsigned long)(x) - CONSISTENT_START) \
+                                        >> PAGE_SHIFT)
+static u64 get_coherent_dma_mask(struct device *dev)
+{
+        u64 mask = ~0ULL;
+        if (dev) {
+                mask = dev->coherent_dma_mask;
+                /*
+                 * Sanity check the DMA mask - it must be non-zero, and
+                 * must be able to be satisfied by a DMA allocation.
+                 */
+                if (mask == 0) {
+                        dev_warn(dev, "coherent DMA mask is unset\n");
+                        return 0;
+                }
+        }
+        return mask;
+}
+/*
+ * This is the page table (2MB) covering uncached, DMA consistent allocations
+ */
+static pte_t *consistent_pte;
+static DEFINE_SPINLOCK(consistent_lock);
+/*
+ * VM region handling support.
+ *
+ * This should become something generic, handling VM region allocations for
+ * vmalloc and similar (ioremap, module space, etc).
+ *
+ * I envisage vmalloc()'s supporting vm_struct becoming:
+ *
+ *  struct vm_struct {
+ *    struct metag_vm_region    region;
+ *    unsigned long     flags;
+ *    struct page       **pages;
+ *    unsigned int      nr_pages;
+ *    unsigned long     phys_addr;
+ *  };
+ *
+ * get_vm_area() would then call metag_vm_region_alloc with an appropriate
+ * struct metag_vm_region head (eg):
+ *
+ *  struct metag_vm_region vmalloc_head = {
+ *      .vm_list        = LIST_HEAD_INIT(vmalloc_head.vm_list),
+ *      .vm_start       = VMALLOC_START,
+ *      .vm_end         = VMALLOC_END,
+ *  };
+ *
+ * However, vmalloc_head.vm_start is variable (typically, it is dependent on
+ * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
+ * would have to initialise this each time prior to calling
+ * metag_vm_region_alloc().
+ */
+struct metag_vm_region {
+        struct list_head vm_list;
+        unsigned long vm_start;
+        unsigned long vm_end;
+        struct page             *vm_pages;
+        int                     vm_active;
+};
+static struct metag_vm_region consistent_head = {
+        .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
+        .vm_start = CONSISTENT_START,
+        .vm_end = CONSISTENT_END,
+};
+static struct metag_vm_region *metag_vm_region_alloc(struct metag_vm_region
+                                                     *head, size_t size,
+                                                     gfp_t gfp)
+{
+        unsigned long addr = head->vm_start, end = head->vm_end - size;
+        unsigned long flags;
+        struct metag_vm_region *c, *new;
+        new = kmalloc(sizeof(struct metag_vm_region), gfp);
+        if (!new)
+                goto out;
+        spin_lock_irqsave(&consistent_lock, flags);
+        list_for_each_entry(c, &head->vm_list, vm_list) {
+                if ((addr + size) < addr)
+                        goto nospc;
+                if ((addr + size) <= c->vm_start)
+                        goto found;
+                addr = c->vm_end;
+                if (addr > end)
+                        goto nospc;
+        }
+found:
+        /*
+         * Insert this entry _before_ the one we found.
+         */
+        list_add_tail(&new->vm_list, &c->vm_list);
+        new->vm_start = addr;
+        new->vm_end = addr + size;
+        new->vm_active = 1;
+        spin_unlock_irqrestore(&consistent_lock, flags);
+        return new;
+nospc:
+        spin_unlock_irqrestore(&consistent_lock, flags);
+        kfree(new);
+out:
+        return NULL;
+}
+static struct metag_vm_region *metag_vm_region_find(struct metag_vm_region
+                                                    *head, unsigned long addr)
+{
+        struct metag_vm_region *c;
+        list_for_each_entry(c, &head->vm_list, vm_list) {
+                if (c->vm_active && c->vm_start == addr)
+                        goto out;
+        }
+        c = NULL;
+out:
+        return c;
+}
+/*
+ * Allocate DMA-coherent memory space and return both the kernel remapped
+ * virtual and bus address for that space.
+ */
+void *dma_alloc_coherent(struct device *dev, size_t size,
+                         dma_addr_t *handle, gfp_t gfp)
+{
+        struct page *page;
+        struct metag_vm_region *c;
+        unsigned long order;
+        u64 mask = get_coherent_dma_mask(dev);
+        u64 limit;
+        if (!consistent_pte) {
+                pr_err("%s: not initialised\n", __func__);
+                dump_stack();
+                return NULL;
+        }
+        if (!mask)
+                goto no_page;
+        size = PAGE_ALIGN(size);
+        limit = (mask + 1) & ~mask;
+        if ((limit && size >= limit)
+            || size >= (CONSISTENT_END - CONSISTENT_START)) {
+                pr_warn("coherent allocation too big (requested %#x mask %#Lx)\n",
+                        size, mask);
+                return NULL;
+        }
+        order = get_order(size);
+        if (mask != 0xffffffff)
+                gfp |= GFP_DMA;
+        page = alloc_pages(gfp, order);
+        if (!page)
+                goto no_page;
+        /*
+         * Invalidate any data that might be lurking in the
+         * kernel direct-mapped region for device DMA.
+         */
+        {
+                void *kaddr = page_address(page);
+                memset(kaddr, 0, size);
+                flush_dcache_region(kaddr, size);
+        }
+        /*
+         * Allocate a virtual address in the consistent mapping region.
+         */
+        c = metag_vm_region_alloc(&consistent_head, size,
+                                  gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
+        if (c) {
+                unsigned long vaddr = c->vm_start;
+                pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr);
+                struct page *end = page + (1 << order);
+                c->vm_pages = page;
+                split_page(page, order);
+                /*
+                 * Set the "dma handle"
+                 */
+                *handle = page_to_bus(page);
+                do {
+                        BUG_ON(!pte_none(*pte));
+                        SetPageReserved(page);
+                        set_pte_at(&init_mm, vaddr,
+                                   pte, mk_pte(page,
+                                               pgprot_writecombine
+                                               (PAGE_KERNEL)));
+                        page++;
+                        pte++;
+                        vaddr += PAGE_SIZE;
+                } while (size -= PAGE_SIZE);
+                /*
+                 * Free the otherwise unused pages.
+                 */
+                while (page < end) {
+                        __free_page(page);
+                        page++;
+                }
+                return (void *)c->vm_start;
+        }
+        if (page)
+                __free_pages(page, order);
+no_page:
+        return NULL;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+/*
+ * free a page as defined by the above mapping.
+ */
+void dma_free_coherent(struct device *dev, size_t size,
+                       void *vaddr, dma_addr_t dma_handle)
+{
+        struct metag_vm_region *c;
+        unsigned long flags, addr;
+        pte_t *ptep;
+        size = PAGE_ALIGN(size);
+        spin_lock_irqsave(&consistent_lock, flags);
+        c = metag_vm_region_find(&consistent_head, (unsigned long)vaddr);
+        if (!c)
+                goto no_area;
+        c->vm_active = 0;
+        if ((c->vm_end - c->vm_start) != size) {
+                pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
+                       __func__, c->vm_end - c->vm_start, size);
+                dump_stack();
+                size = c->vm_end - c->vm_start;
+        }
+        ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
+        addr = c->vm_start;
+        do {
+                pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
+                unsigned long pfn;
+                ptep++;
+                addr += PAGE_SIZE;
+                if (!pte_none(pte) && pte_present(pte)) {
+                        pfn = pte_pfn(pte);
+                        if (pfn_valid(pfn)) {
+                                struct page *page = pfn_to_page(pfn);
+                                ClearPageReserved(page);
+                                __free_page(page);
+                                continue;
+                        }
+                }
+                pr_crit("%s: bad page in kernel page table\n",
+                        __func__);
+        } while (size -= PAGE_SIZE);
+        flush_tlb_kernel_range(c->vm_start, c->vm_end);
+        list_del(&c->vm_list);
+        spin_unlock_irqrestore(&consistent_lock, flags);
+        kfree(c);
+        return;
+no_area:
+        spin_unlock_irqrestore(&consistent_lock, flags);
+        pr_err("%s: trying to free invalid coherent area: %p\n",
+               __func__, vaddr);
+        dump_stack();
+}
+EXPORT_SYMBOL(dma_free_coherent);
+static int dma_mmap(struct device *dev, struct vm_area_struct *vma,
+                    void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+        int ret = -ENXIO;
+        unsigned long flags, user_size, kern_size;
+        struct metag_vm_region *c;
+        user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+        spin_lock_irqsave(&consistent_lock, flags);
+        c = metag_vm_region_find(&consistent_head, (unsigned long)cpu_addr);
+        spin_unlock_irqrestore(&consistent_lock, flags);
+        if (c) {
+                unsigned long off = vma->vm_pgoff;
+                kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
+                if (off < kern_size &&
+                    user_size <= (kern_size - off)) {
+                        ret = remap_pfn_range(vma, vma->vm_start,
+                                              page_to_pfn(c->vm_pages) + off,
+                                              user_size << PAGE_SHIFT,
+                                              vma->vm_page_prot);
+                }
+        }
+        return ret;
+}
+int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+                      void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+        vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+        return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+EXPORT_SYMBOL(dma_mmap_coherent);
+int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
+                          void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+        vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+        return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+EXPORT_SYMBOL(dma_mmap_writecombine);
+/*
+ * Initialise the consistent memory allocation.
+ */
+static int __init dma_alloc_init(void)
+{
+        pgd_t *pgd, *pgd_k;
+        pud_t *pud, *pud_k;
+        pmd_t *pmd, *pmd_k;
+        pte_t *pte;
+        int ret = 0;
+        do {
+                int offset = pgd_index(CONSISTENT_START);
+                pgd = pgd_offset(&init_mm, CONSISTENT_START);
+                pud = pud_alloc(&init_mm, pgd, CONSISTENT_START);
+                pmd = pmd_alloc(&init_mm, pud, CONSISTENT_START);
+                if (!pmd) {
+                        pr_err("%s: no pmd tables\n", __func__);
+                        ret = -ENOMEM;
+                        break;
+                }
+                WARN_ON(!pmd_none(*pmd));
+                pte = pte_alloc_kernel(pmd, CONSISTENT_START);
+                if (!pte) {
+                        pr_err("%s: no pte tables\n", __func__);
+                        ret = -ENOMEM;
+                        break;
+                }
+                pgd_k = ((pgd_t *) mmu_get_base()) + offset;
+                pud_k = pud_offset(pgd_k, CONSISTENT_START);
+                pmd_k = pmd_offset(pud_k, CONSISTENT_START);
+                set_pmd(pmd_k, *pmd);
+                consistent_pte = pte;
+        } while (0);
+        return ret;
+}
+early_initcall(dma_alloc_init);
+/*
+ * make an area consistent to devices.
+ */
+void dma_sync_for_device(void *vaddr, size_t size, int dma_direction)
+{
+        /*
+         * Ensure any writes get through the write combiner. This is necessary
+         * even with DMA_FROM_DEVICE, or the write may dirty the cache after
+         * we've invalidated it and get written back during the DMA.
+         */
+        barrier();
+        switch (dma_direction) {
+        case DMA_BIDIRECTIONAL:
+                /*
+                 * Writeback to ensure the device can see our latest changes and
+                 * so that we have no dirty lines, and invalidate the cache
+                 * lines too in preparation for receiving the buffer back
+                 * (dma_sync_for_cpu) later.
+                 */
+                flush_dcache_region(vaddr, size);
+                break;
+        case DMA_TO_DEVICE:
+                /*
+                 * Writeback to ensure the device can see our latest changes.
+                 * There's no need to invalidate as the device shouldn't write
+                 * to the buffer.
+                 */
+                writeback_dcache_region(vaddr, size);
+                break;
+        case DMA_FROM_DEVICE:
+                /*
+                 * Invalidate to ensure we have no dirty lines that could get
+                 * written back during the DMA. It's also safe to flush
+                 * (writeback) here if necessary.
+                 */
+                invalidate_dcache_region(vaddr, size);
+                break;
+        case DMA_NONE:
+                BUG();
+        }
+        wmb();
+}
+EXPORT_SYMBOL(dma_sync_for_device);
+/*
+ * make an area consistent to the core.
+ */
+void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction)
+{
+        /*
+         * Hardware L2 cache prefetch doesn't occur across 4K physical
+         * boundaries, however according to Documentation/DMA-API-HOWTO.txt
+         * kmalloc'd memory is DMA'able, so accesses in nearby memory could
+         * trigger a cache fill in the DMA buffer.
+         *
+         * This should never cause dirty lines, so a flush or invalidate should
+         * be safe to allow us to see data from the device.
+         */
+        if (_meta_l2c_pf_is_enabled()) {
+                switch (dma_direction) {
+                case DMA_BIDIRECTIONAL:
+                case DMA_FROM_DEVICE:
+                        invalidate_dcache_region(vaddr, size);
+                        break;
+                case DMA_TO_DEVICE:
+                        /* The device shouldn't have written to the buffer */
+                        break;
+                case DMA_NONE:
+                        BUG();
+                }
+        }
+        rmb();
+}
+EXPORT_SYMBOL(dma_sync_for_cpu);
author	James Hogan <james.hogan@imgtec.com>	2012-10-05 11:27:03 -0400
committer	James Hogan <james.hogan@imgtec.com>	2013-03-02 15:09:51 -0500
commit	f507758ccbed5c354cc1ce3b8f53ea072d7bc222 (patch)
tree	dd474b63b194039b5c6c97790016f55a02a93643 /arch/metag
parent	42682c6c42a5765b2c7cccfca170368fef6191ef (diff)

diff --git a/arch/metag/include/asm/dma-mapping.h b/arch/metag/include/asm/dma-mapping.h new file mode 100644 index 000000000000..b5f80a62fe8b --- /dev/null +++ b/arch/metag/include/asm/dma-mapping.h
@@ -0,0 +1,183 @@
	1	#ifndef _ASM_METAG_DMA_MAPPING_H
	2	#define _ASM_METAG_DMA_MAPPING_H
	3
	4	#include <linux/mm.h>
	5
	6	#include <asm/cache.h>
	7	#include <asm/io.h>
	8	#include <linux/scatterlist.h>
	9	#include <asm/bug.h>
	10
	11	#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
	12	#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
	13
	14	void dma_alloc_coherent(struct device dev, size_t size,
	15	dma_addr_t *dma_handle, gfp_t flag);
	16
	17	void dma_free_coherent(struct device *dev, size_t size,
	18	void *vaddr, dma_addr_t dma_handle);
	19
	20	void dma_sync_for_device(void *vaddr, size_t size, int dma_direction);
	21	void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction);
	22
	23	int dma_mmap_coherent(struct device dev, struct vm_area_struct vma,
	24	void *cpu_addr, dma_addr_t dma_addr, size_t size);
	25
	26	int dma_mmap_writecombine(struct device dev, struct vm_area_struct vma,
	27	void *cpu_addr, dma_addr_t dma_addr, size_t size);
	28
	29	static inline dma_addr_t
	30	dma_map_single(struct device dev, void ptr, size_t size,
	31	enum dma_data_direction direction)
	32	{
	33	BUG_ON(!valid_dma_direction(direction));
	34	WARN_ON(size == 0);
	35	dma_sync_for_device(ptr, size, direction);
	36	return virt_to_phys(ptr);
	37	}
	38
	39	static inline void
	40	dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
	41	enum dma_data_direction direction)
	42	{
	43	BUG_ON(!valid_dma_direction(direction));
	44	dma_sync_for_cpu(phys_to_virt(dma_addr), size, direction);
	45	}
	46
	47	static inline int
	48	dma_map_sg(struct device dev, struct scatterlist sglist, int nents,
	49	enum dma_data_direction direction)
	50	{
	51	struct scatterlist *sg;
	52	int i;
	53
	54	BUG_ON(!valid_dma_direction(direction));
	55	WARN_ON(nents == 0 \|\| sglist[0].length == 0);
	56
	57	for_each_sg(sglist, sg, nents, i) {
	58	BUG_ON(!sg_page(sg));
	59
	60	sg->dma_address = sg_phys(sg);
	61	dma_sync_for_device(sg_virt(sg), sg->length, direction);
	62	}
	63
	64	return nents;
	65	}
	66
	67	static inline dma_addr_t
	68	dma_map_page(struct device dev, struct page page, unsigned long offset,
	69	size_t size, enum dma_data_direction direction)
	70	{
	71	BUG_ON(!valid_dma_direction(direction));
	72	dma_sync_for_device((void *)(page_to_phys(page) + offset), size,
	73	direction);
	74	return page_to_phys(page) + offset;
	75	}
	76
	77	static inline void
	78	dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
	79	enum dma_data_direction direction)
	80	{
	81	BUG_ON(!valid_dma_direction(direction));
	82	dma_sync_for_cpu(phys_to_virt(dma_address), size, direction);
	83	}
	84
	85
	86	static inline void
	87	dma_unmap_sg(struct device dev, struct scatterlist sglist, int nhwentries,
	88	enum dma_data_direction direction)
	89	{
	90	struct scatterlist *sg;
	91	int i;
	92
	93	BUG_ON(!valid_dma_direction(direction));
	94	WARN_ON(nhwentries == 0 \|\| sglist[0].length == 0);
	95
	96	for_each_sg(sglist, sg, nhwentries, i) {
	97	BUG_ON(!sg_page(sg));
	98
	99	sg->dma_address = sg_phys(sg);
	100	dma_sync_for_cpu(sg_virt(sg), sg->length, direction);
	101	}
	102	}
	103
	104	static inline void
	105	dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
	106	enum dma_data_direction direction)
	107	{
	108	dma_sync_for_cpu(phys_to_virt(dma_handle), size, direction);
	109	}
	110
	111	static inline void
	112	dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
	113	size_t size, enum dma_data_direction direction)
	114	{
	115	dma_sync_for_device(phys_to_virt(dma_handle), size, direction);
	116	}
	117
	118	static inline void
	119	dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
	120	unsigned long offset, size_t size,
	121	enum dma_data_direction direction)
	122	{
	123	dma_sync_for_cpu(phys_to_virt(dma_handle)+offset, size,
	124	direction);
	125	}
	126
	127	static inline void
	128	dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
	129	unsigned long offset, size_t size,
	130	enum dma_data_direction direction)
	131	{
	132	dma_sync_for_device(phys_to_virt(dma_handle)+offset, size,
	133	direction);
	134	}
	135
	136	static inline void
	137	dma_sync_sg_for_cpu(struct device dev, struct scatterlist sg, int nelems,
	138	enum dma_data_direction direction)
	139	{
	140	int i;
	141	for (i = 0; i < nelems; i++, sg++)
	142	dma_sync_for_cpu(sg_virt(sg), sg->length, direction);
	143	}
	144
	145	static inline void
	146	dma_sync_sg_for_device(struct device dev, struct scatterlist sg, int nelems,
	147	enum dma_data_direction direction)
	148	{
	149	int i;
	150	for (i = 0; i < nelems; i++, sg++)
	151	dma_sync_for_device(sg_virt(sg), sg->length, direction);
	152	}
	153
	154	static inline int
	155	dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
	156	{
	157	return 0;
	158	}
	159
	160	#define dma_supported(dev, mask) (1)
	161
	162	static inline int
	163	dma_set_mask(struct device *dev, u64 mask)
	164	{
	165	if (!dev->dma_mask \|\| !dma_supported(dev, mask))
	166	return -EIO;
	167
	168	*dev->dma_mask = mask;
	169
	170	return 0;
	171	}
	172
	173	/*
	174	* dma_alloc_noncoherent() returns non-cacheable memory, so there's no need to
	175	* do any flushing here.
	176	*/
	177	static inline void
	178	dma_cache_sync(struct device dev, void vaddr, size_t size,
	179	enum dma_data_direction direction)
	180	{
	181	}
	182
	183	#endif


diff --git a/arch/metag/kernel/dma.c b/arch/metag/kernel/dma.c new file mode 100644 index 000000000000..8c00dedadc54 --- /dev/null +++ b/arch/metag/kernel/dma.c
@@ -0,0 +1,507 @@
	1	/*
	2	* Meta version derived from arch/powerpc/lib/dma-noncoherent.c
	3	* Copyright (C) 2008 Imagination Technologies Ltd.
	4	*
	5	* PowerPC version derived from arch/arm/mm/consistent.c
	6	* Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
	7	*
	8	* Copyright (C) 2000 Russell King
	9	*
	10	* Consistent memory allocators. Used for DMA devices that want to
	11	* share uncached memory with the processor core. The function return
	12	* is the virtual address and 'dma_handle' is the physical address.
	13	* Mostly stolen from the ARM port, with some changes for PowerPC.
	14	* -- Dan
	15	*
	16	* Reorganized to get rid of the arch-specific consistent_* functions
	17	* and provide non-coherent implementations for the DMA API. -Matt
	18	*
	19	* Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
	20	* implementation. This is pulled straight from ARM and barely
	21	* modified. -Matt
	22	*
	23	* This program is free software; you can redistribute it and/or modify
	24	* it under the terms of the GNU General Public License version 2 as
	25	* published by the Free Software Foundation.
	26	*/
	27
	28	#include <linux/sched.h>
	29	#include <linux/kernel.h>
	30	#include <linux/errno.h>
	31	#include <linux/export.h>
	32	#include <linux/string.h>
	33	#include <linux/types.h>
	34	#include <linux/highmem.h>
	35	#include <linux/dma-mapping.h>
	36	#include <linux/slab.h>
	37
	38	#include <asm/tlbflush.h>
	39	#include <asm/mmu.h>
	40
	41	#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_START) \
	42	>> PAGE_SHIFT)
	43
	44	static u64 get_coherent_dma_mask(struct device *dev)
	45	{
	46	u64 mask = ~0ULL;
	47
	48	if (dev) {
	49	mask = dev->coherent_dma_mask;
	50
	51	/*
	52	* Sanity check the DMA mask - it must be non-zero, and
	53	* must be able to be satisfied by a DMA allocation.
	54	*/
	55	if (mask == 0) {
	56	dev_warn(dev, "coherent DMA mask is unset\n");
	57	return 0;
	58	}
	59	}
	60
	61	return mask;
	62	}
	63	/*
	64	* This is the page table (2MB) covering uncached, DMA consistent allocations
	65	*/
	66	static pte_t *consistent_pte;
	67	static DEFINE_SPINLOCK(consistent_lock);
	68
	69	/*
	70	* VM region handling support.
	71	*
	72	* This should become something generic, handling VM region allocations for
	73	* vmalloc and similar (ioremap, module space, etc).
	74	*
	75	* I envisage vmalloc()'s supporting vm_struct becoming:
	76	*
	77	* struct vm_struct {
	78	* struct metag_vm_region region;
	79	* unsigned long flags;
	80	* struct page **pages;
	81	* unsigned int nr_pages;
	82	* unsigned long phys_addr;
	83	* };
	84	*
	85	* get_vm_area() would then call metag_vm_region_alloc with an appropriate
	86	* struct metag_vm_region head (eg):
	87	*
	88	* struct metag_vm_region vmalloc_head = {
	89	* .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list),
	90	* .vm_start = VMALLOC_START,
	91	* .vm_end = VMALLOC_END,
	92	* };
	93	*
	94	* However, vmalloc_head.vm_start is variable (typically, it is dependent on
	95	* the amount of RAM found at boot time.) I would imagine that get_vm_area()
	96	* would have to initialise this each time prior to calling
	97	* metag_vm_region_alloc().
	98	*/
	99	struct metag_vm_region {
	100	struct list_head vm_list;
	101	unsigned long vm_start;
	102	unsigned long vm_end;
	103	struct page *vm_pages;
	104	int vm_active;
	105	};
	106
	107	static struct metag_vm_region consistent_head = {
	108	.vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
	109	.vm_start = CONSISTENT_START,
	110	.vm_end = CONSISTENT_END,
	111	};
	112
	113	static struct metag_vm_region *metag_vm_region_alloc(struct metag_vm_region
	114	*head, size_t size,
	115	gfp_t gfp)
	116	{
	117	unsigned long addr = head->vm_start, end = head->vm_end - size;
	118	unsigned long flags;
	119	struct metag_vm_region c, new;
	120
	121	new = kmalloc(sizeof(struct metag_vm_region), gfp);
	122	if (!new)
	123	goto out;
	124
	125	spin_lock_irqsave(&consistent_lock, flags);
	126
	127	list_for_each_entry(c, &head->vm_list, vm_list) {
	128	if ((addr + size) < addr)
	129	goto nospc;
	130	if ((addr + size) <= c->vm_start)
	131	goto found;
	132	addr = c->vm_end;
	133	if (addr > end)
	134	goto nospc;
	135	}
	136
	137	found:
	138	/*
	139	* Insert this entry _before_ the one we found.
	140	*/
	141	list_add_tail(&new->vm_list, &c->vm_list);
	142	new->vm_start = addr;
	143	new->vm_end = addr + size;
	144	new->vm_active = 1;
	145
	146	spin_unlock_irqrestore(&consistent_lock, flags);
	147	return new;
	148
	149	nospc:
	150	spin_unlock_irqrestore(&consistent_lock, flags);
	151	kfree(new);
	152	out:
	153	return NULL;
	154	}
	155
	156	static struct metag_vm_region *metag_vm_region_find(struct metag_vm_region
	157	*head, unsigned long addr)
	158	{
	159	struct metag_vm_region *c;
	160
	161	list_for_each_entry(c, &head->vm_list, vm_list) {
	162	if (c->vm_active && c->vm_start == addr)
	163	goto out;
	164	}
	165	c = NULL;
	166	out:
	167	return c;
	168	}
	169
	170	/*
	171	* Allocate DMA-coherent memory space and return both the kernel remapped
	172	* virtual and bus address for that space.
	173	*/
	174	void dma_alloc_coherent(struct device dev, size_t size,
	175	dma_addr_t *handle, gfp_t gfp)
	176	{
	177	struct page *page;
	178	struct metag_vm_region *c;
	179	unsigned long order;
	180	u64 mask = get_coherent_dma_mask(dev);
	181	u64 limit;
	182
	183	if (!consistent_pte) {
	184	pr_err("%s: not initialised\n", __func__);
	185	dump_stack();
	186	return NULL;
	187	}
	188
	189	if (!mask)
	190	goto no_page;
	191	size = PAGE_ALIGN(size);
	192	limit = (mask + 1) & ~mask;
	193	if ((limit && size >= limit)
	194	\|\| size >= (CONSISTENT_END - CONSISTENT_START)) {
	195	pr_warn("coherent allocation too big (requested %#x mask %#Lx)\n",
	196	size, mask);
	197	return NULL;
	198	}
	199
	200	order = get_order(size);
	201
	202	if (mask != 0xffffffff)
	203	gfp \|= GFP_DMA;
	204
	205	page = alloc_pages(gfp, order);
	206	if (!page)
	207	goto no_page;
	208
	209	/*
	210	* Invalidate any data that might be lurking in the
	211	* kernel direct-mapped region for device DMA.
	212	*/
	213	{
	214	void *kaddr = page_address(page);
	215	memset(kaddr, 0, size);
	216	flush_dcache_region(kaddr, size);
	217	}
	218
	219	/*
	220	* Allocate a virtual address in the consistent mapping region.
	221	*/
	222	c = metag_vm_region_alloc(&consistent_head, size,
	223	gfp & ~(__GFP_DMA \| __GFP_HIGHMEM));
	224	if (c) {
	225	unsigned long vaddr = c->vm_start;
	226	pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr);
	227	struct page *end = page + (1 << order);
	228
	229	c->vm_pages = page;
	230	split_page(page, order);
	231
	232	/*
	233	* Set the "dma handle"
	234	*/
	235	*handle = page_to_bus(page);
	236
	237	do {
	238	BUG_ON(!pte_none(*pte));
	239
	240	SetPageReserved(page);
	241	set_pte_at(&init_mm, vaddr,
	242	pte, mk_pte(page,
	243	pgprot_writecombine
	244	(PAGE_KERNEL)));
	245	page++;
	246	pte++;
	247	vaddr += PAGE_SIZE;
	248	} while (size -= PAGE_SIZE);
	249
	250	/*
	251	* Free the otherwise unused pages.
	252	*/
	253	while (page < end) {
	254	__free_page(page);
	255	page++;
	256	}
	257
	258	return (void *)c->vm_start;
	259	}
	260
	261	if (page)
	262	__free_pages(page, order);
	263	no_page:
	264	return NULL;
	265	}
	266	EXPORT_SYMBOL(dma_alloc_coherent);
	267
	268	/*
	269	* free a page as defined by the above mapping.
	270	*/
	271	void dma_free_coherent(struct device *dev, size_t size,
	272	void *vaddr, dma_addr_t dma_handle)
	273	{
	274	struct metag_vm_region *c;
	275	unsigned long flags, addr;
	276	pte_t *ptep;
	277
	278	size = PAGE_ALIGN(size);
	279
	280	spin_lock_irqsave(&consistent_lock, flags);
	281
	282	c = metag_vm_region_find(&consistent_head, (unsigned long)vaddr);
	283	if (!c)
	284	goto no_area;
	285
	286	c->vm_active = 0;
	287	if ((c->vm_end - c->vm_start) != size) {
	288	pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
	289	__func__, c->vm_end - c->vm_start, size);
	290	dump_stack();
	291	size = c->vm_end - c->vm_start;
	292	}
	293
	294	ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
	295	addr = c->vm_start;
	296	do {
	297	pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
	298	unsigned long pfn;
	299
	300	ptep++;
	301	addr += PAGE_SIZE;
	302
	303	if (!pte_none(pte) && pte_present(pte)) {
	304	pfn = pte_pfn(pte);
	305
	306	if (pfn_valid(pfn)) {
	307	struct page *page = pfn_to_page(pfn);
	308	ClearPageReserved(page);
	309
	310	__free_page(page);
	311	continue;
	312	}
	313	}
	314
	315	pr_crit("%s: bad page in kernel page table\n",
	316	__func__);
	317	} while (size -= PAGE_SIZE);
	318
	319	flush_tlb_kernel_range(c->vm_start, c->vm_end);
	320
	321	list_del(&c->vm_list);
	322
	323	spin_unlock_irqrestore(&consistent_lock, flags);
	324
	325	kfree(c);
	326	return;
	327
	328	no_area:
	329	spin_unlock_irqrestore(&consistent_lock, flags);
	330	pr_err("%s: trying to free invalid coherent area: %p\n",
	331	__func__, vaddr);
	332	dump_stack();
	333	}
	334	EXPORT_SYMBOL(dma_free_coherent);
	335
	336
	337	static int dma_mmap(struct device dev, struct vm_area_struct vma,
	338	void *cpu_addr, dma_addr_t dma_addr, size_t size)
	339	{
	340	int ret = -ENXIO;
	341
	342	unsigned long flags, user_size, kern_size;
	343	struct metag_vm_region *c;
	344
	345	user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
	346
	347	spin_lock_irqsave(&consistent_lock, flags);
	348	c = metag_vm_region_find(&consistent_head, (unsigned long)cpu_addr);
	349	spin_unlock_irqrestore(&consistent_lock, flags);
	350
	351	if (c) {
	352	unsigned long off = vma->vm_pgoff;
	353
	354	kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
	355
	356	if (off < kern_size &&
	357	user_size <= (kern_size - off)) {
	358	ret = remap_pfn_range(vma, vma->vm_start,
	359	page_to_pfn(c->vm_pages) + off,
	360	user_size << PAGE_SHIFT,
	361	vma->vm_page_prot);
	362	}
	363	}
	364
	365
	366	return ret;
	367	}
	368
	369	int dma_mmap_coherent(struct device dev, struct vm_area_struct vma,
	370	void *cpu_addr, dma_addr_t dma_addr, size_t size)
	371	{
	372	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
	373	return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
	374	}
	375	EXPORT_SYMBOL(dma_mmap_coherent);
	376
	377	int dma_mmap_writecombine(struct device dev, struct vm_area_struct vma,
	378	void *cpu_addr, dma_addr_t dma_addr, size_t size)
	379	{
	380	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
	381	return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
	382	}
	383	EXPORT_SYMBOL(dma_mmap_writecombine);
	384
	385
	386
	387
	388	/*
	389	* Initialise the consistent memory allocation.
	390	*/
	391	static int __init dma_alloc_init(void)
	392	{
	393	pgd_t pgd, pgd_k;
	394	pud_t pud, pud_k;
	395	pmd_t pmd, pmd_k;
	396	pte_t *pte;
	397	int ret = 0;
	398
	399	do {
	400	int offset = pgd_index(CONSISTENT_START);
	401	pgd = pgd_offset(&init_mm, CONSISTENT_START);
	402	pud = pud_alloc(&init_mm, pgd, CONSISTENT_START);
	403	pmd = pmd_alloc(&init_mm, pud, CONSISTENT_START);
	404	if (!pmd) {
	405	pr_err("%s: no pmd tables\n", __func__);
	406	ret = -ENOMEM;
	407	break;
	408	}
	409	WARN_ON(!pmd_none(*pmd));
	410
	411	pte = pte_alloc_kernel(pmd, CONSISTENT_START);
	412	if (!pte) {
	413	pr_err("%s: no pte tables\n", __func__);
	414	ret = -ENOMEM;
	415	break;
	416	}
	417
	418	pgd_k = ((pgd_t *) mmu_get_base()) + offset;
	419	pud_k = pud_offset(pgd_k, CONSISTENT_START);
	420	pmd_k = pmd_offset(pud_k, CONSISTENT_START);
	421	set_pmd(pmd_k, *pmd);
	422
	423	consistent_pte = pte;
	424	} while (0);
	425
	426	return ret;
	427	}
	428	early_initcall(dma_alloc_init);
	429
	430	/*
	431	* make an area consistent to devices.
	432	*/
	433	void dma_sync_for_device(void *vaddr, size_t size, int dma_direction)
	434	{
	435	/*
	436	* Ensure any writes get through the write combiner. This is necessary
	437	* even with DMA_FROM_DEVICE, or the write may dirty the cache after
	438	* we've invalidated it and get written back during the DMA.
	439	*/
	440
	441	barrier();
	442
	443	switch (dma_direction) {
	444	case DMA_BIDIRECTIONAL:
	445	/*
	446	* Writeback to ensure the device can see our latest changes and
	447	* so that we have no dirty lines, and invalidate the cache
	448	* lines too in preparation for receiving the buffer back
	449	* (dma_sync_for_cpu) later.
	450	*/
	451	flush_dcache_region(vaddr, size);
	452	break;
	453	case DMA_TO_DEVICE:
	454	/*
	455	* Writeback to ensure the device can see our latest changes.
	456	* There's no need to invalidate as the device shouldn't write
	457	* to the buffer.
	458	*/
	459	writeback_dcache_region(vaddr, size);
	460	break;
	461	case DMA_FROM_DEVICE:
	462	/*
	463	* Invalidate to ensure we have no dirty lines that could get
	464	* written back during the DMA. It's also safe to flush
	465	* (writeback) here if necessary.
	466	*/
	467	invalidate_dcache_region(vaddr, size);
	468	break;
	469	case DMA_NONE:
	470	BUG();
	471	}
	472
	473	wmb();
	474	}
	475	EXPORT_SYMBOL(dma_sync_for_device);
	476
	477	/*
	478	* make an area consistent to the core.
	479	*/
	480	void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction)
	481	{
	482	/*
	483	* Hardware L2 cache prefetch doesn't occur across 4K physical
	484	* boundaries, however according to Documentation/DMA-API-HOWTO.txt
	485	* kmalloc'd memory is DMA'able, so accesses in nearby memory could
	486	* trigger a cache fill in the DMA buffer.
	487	*
	488	* This should never cause dirty lines, so a flush or invalidate should
	489	* be safe to allow us to see data from the device.
	490	*/
	491	if (_meta_l2c_pf_is_enabled()) {
	492	switch (dma_direction) {
	493	case DMA_BIDIRECTIONAL:
	494	case DMA_FROM_DEVICE:
	495	invalidate_dcache_region(vaddr, size);
	496	break;
	497	case DMA_TO_DEVICE:
	498	/* The device shouldn't have written to the buffer */
	499	break;
	500	case DMA_NONE:
	501	BUG();
	502	}
	503	}
	504
	505	rmb();
	506	}
	507	EXPORT_SYMBOL(dma_sync_for_cpu);