aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-30 13:11:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-30 13:11:31 -0400
commit6f51f51582e793ea13e7de7ed6b138f71c51784b (patch)
tree211ecbf88cdf2f183e23da3f8f23153ac6133410 /arch/arm/mm
parent76159c20c0bcf5b38178fbfb61049eeb6380bb54 (diff)
parent97ef952a20853fad72087a53fa556fbec45edd8f (diff)
Merge branch 'for-linus-for-3.6-rc1' of git://git.linaro.org/people/mszyprowski/linux-dma-mapping
Pull DMA-mapping updates from Marek Szyprowski: "Those patches are continuation of my earlier work. They contains extensions to DMA-mapping framework to remove limitation of the current ARM implementation (like limited total size of DMA coherent/write combine buffers), improve performance of buffer sharing between devices (attributes to skip cpu cache operations or creation of additional kernel mapping for some specific use cases) as well as some unification of the common code for dma_mmap_attrs() and dma_mmap_coherent() functions. All extensions have been implemented and tested for ARM architecture." * 'for-linus-for-3.6-rc1' of git://git.linaro.org/people/mszyprowski/linux-dma-mapping: ARM: dma-mapping: add support for DMA_ATTR_SKIP_CPU_SYNC attribute common: DMA-mapping: add DMA_ATTR_SKIP_CPU_SYNC attribute ARM: dma-mapping: add support for dma_get_sgtable() common: dma-mapping: introduce dma_get_sgtable() function ARM: dma-mapping: add support for DMA_ATTR_NO_KERNEL_MAPPING attribute common: DMA-mapping: add DMA_ATTR_NO_KERNEL_MAPPING attribute common: dma-mapping: add support for generic dma_mmap_* calls ARM: dma-mapping: fix error path for memory allocation failure ARM: dma-mapping: add more sanity checks in arm_dma_mmap() ARM: dma-mapping: remove custom consistent dma region mm: vmalloc: use const void * for caller argument scatterlist: add sg_alloc_table_from_pages function
Diffstat (limited to 'arch/arm/mm')
-rw-r--r--arch/arm/mm/dma-mapping.c561
-rw-r--r--arch/arm/mm/mm.h3
2 files changed, 230 insertions, 334 deletions
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 5cfc98994076..c2cdf6500f75 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -22,6 +22,7 @@
22#include <linux/memblock.h> 22#include <linux/memblock.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/iommu.h> 24#include <linux/iommu.h>
25#include <linux/io.h>
25#include <linux/vmalloc.h> 26#include <linux/vmalloc.h>
26#include <linux/sizes.h> 27#include <linux/sizes.h>
27 28
@@ -72,7 +73,7 @@ static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page,
72 unsigned long offset, size_t size, enum dma_data_direction dir, 73 unsigned long offset, size_t size, enum dma_data_direction dir,
73 struct dma_attrs *attrs) 74 struct dma_attrs *attrs)
74{ 75{
75 if (!arch_is_coherent()) 76 if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
76 __dma_page_cpu_to_dev(page, offset, size, dir); 77 __dma_page_cpu_to_dev(page, offset, size, dir);
77 return pfn_to_dma(dev, page_to_pfn(page)) + offset; 78 return pfn_to_dma(dev, page_to_pfn(page)) + offset;
78} 79}
@@ -95,7 +96,7 @@ static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle,
95 size_t size, enum dma_data_direction dir, 96 size_t size, enum dma_data_direction dir,
96 struct dma_attrs *attrs) 97 struct dma_attrs *attrs)
97{ 98{
98 if (!arch_is_coherent()) 99 if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
99 __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), 100 __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)),
100 handle & ~PAGE_MASK, size, dir); 101 handle & ~PAGE_MASK, size, dir);
101} 102}
@@ -124,6 +125,7 @@ struct dma_map_ops arm_dma_ops = {
124 .alloc = arm_dma_alloc, 125 .alloc = arm_dma_alloc,
125 .free = arm_dma_free, 126 .free = arm_dma_free,
126 .mmap = arm_dma_mmap, 127 .mmap = arm_dma_mmap,
128 .get_sgtable = arm_dma_get_sgtable,
127 .map_page = arm_dma_map_page, 129 .map_page = arm_dma_map_page,
128 .unmap_page = arm_dma_unmap_page, 130 .unmap_page = arm_dma_unmap_page,
129 .map_sg = arm_dma_map_sg, 131 .map_sg = arm_dma_map_sg,
@@ -217,115 +219,70 @@ static void __dma_free_buffer(struct page *page, size_t size)
217} 219}
218 220
219#ifdef CONFIG_MMU 221#ifdef CONFIG_MMU
222#ifdef CONFIG_HUGETLB_PAGE
223#error ARM Coherent DMA allocator does not (yet) support huge TLB
224#endif
220 225
221#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - consistent_base) >> PAGE_SHIFT) 226static void *__alloc_from_contiguous(struct device *dev, size_t size,
222#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PMD_SHIFT) 227 pgprot_t prot, struct page **ret_page);
223
224/*
225 * These are the page tables (2MB each) covering uncached, DMA consistent allocations
226 */
227static pte_t **consistent_pte;
228
229#define DEFAULT_CONSISTENT_DMA_SIZE SZ_2M
230 228
231static unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE; 229static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
230 pgprot_t prot, struct page **ret_page,
231 const void *caller);
232 232
233void __init init_consistent_dma_size(unsigned long size) 233static void *
234__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
235 const void *caller)
234{ 236{
235 unsigned long base = CONSISTENT_END - ALIGN(size, SZ_2M); 237 struct vm_struct *area;
238 unsigned long addr;
236 239
237 BUG_ON(consistent_pte); /* Check we're called before DMA region init */ 240 /*
238 BUG_ON(base < VMALLOC_END); 241 * DMA allocation can be mapped to user space, so lets
242 * set VM_USERMAP flags too.
243 */
244 area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP,
245 caller);
246 if (!area)
247 return NULL;
248 addr = (unsigned long)area->addr;
249 area->phys_addr = __pfn_to_phys(page_to_pfn(page));
239 250
240 /* Grow region to accommodate specified size */ 251 if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) {
241 if (base < consistent_base) 252 vunmap((void *)addr);
242 consistent_base = base; 253 return NULL;
254 }
255 return (void *)addr;
243} 256}
244 257
245#include "vmregion.h" 258static void __dma_free_remap(void *cpu_addr, size_t size)
246
247static struct arm_vmregion_head consistent_head = {
248 .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock),
249 .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
250 .vm_end = CONSISTENT_END,
251};
252
253#ifdef CONFIG_HUGETLB_PAGE
254#error ARM Coherent DMA allocator does not (yet) support huge TLB
255#endif
256
257/*
258 * Initialise the consistent memory allocation.
259 */
260static int __init consistent_init(void)
261{ 259{
262 int ret = 0; 260 unsigned int flags = VM_ARM_DMA_CONSISTENT | VM_USERMAP;
263 pgd_t *pgd; 261 struct vm_struct *area = find_vm_area(cpu_addr);
264 pud_t *pud; 262 if (!area || (area->flags & flags) != flags) {
265 pmd_t *pmd; 263 WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
266 pte_t *pte; 264 return;
267 int i = 0;
268 unsigned long base = consistent_base;
269 unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT;
270
271 if (IS_ENABLED(CONFIG_CMA) && !IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))
272 return 0;
273
274 consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL);
275 if (!consistent_pte) {
276 pr_err("%s: no memory\n", __func__);
277 return -ENOMEM;
278 } 265 }
279 266 unmap_kernel_range((unsigned long)cpu_addr, size);
280 pr_debug("DMA memory: 0x%08lx - 0x%08lx:\n", base, CONSISTENT_END); 267 vunmap(cpu_addr);
281 consistent_head.vm_start = base;
282
283 do {
284 pgd = pgd_offset(&init_mm, base);
285
286 pud = pud_alloc(&init_mm, pgd, base);
287 if (!pud) {
288 pr_err("%s: no pud tables\n", __func__);
289 ret = -ENOMEM;
290 break;
291 }
292
293 pmd = pmd_alloc(&init_mm, pud, base);
294 if (!pmd) {
295 pr_err("%s: no pmd tables\n", __func__);
296 ret = -ENOMEM;
297 break;
298 }
299 WARN_ON(!pmd_none(*pmd));
300
301 pte = pte_alloc_kernel(pmd, base);
302 if (!pte) {
303 pr_err("%s: no pte tables\n", __func__);
304 ret = -ENOMEM;
305 break;
306 }
307
308 consistent_pte[i++] = pte;
309 base += PMD_SIZE;
310 } while (base < CONSISTENT_END);
311
312 return ret;
313} 268}
314core_initcall(consistent_init);
315 269
316static void *__alloc_from_contiguous(struct device *dev, size_t size, 270struct dma_pool {
317 pgprot_t prot, struct page **ret_page); 271 size_t size;
318 272 spinlock_t lock;
319static struct arm_vmregion_head coherent_head = { 273 unsigned long *bitmap;
320 .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock), 274 unsigned long nr_pages;
321 .vm_list = LIST_HEAD_INIT(coherent_head.vm_list), 275 void *vaddr;
276 struct page *page;
322}; 277};
323 278
324static size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8; 279static struct dma_pool atomic_pool = {
280 .size = SZ_256K,
281};
325 282
326static int __init early_coherent_pool(char *p) 283static int __init early_coherent_pool(char *p)
327{ 284{
328 coherent_pool_size = memparse(p, &p); 285 atomic_pool.size = memparse(p, &p);
329 return 0; 286 return 0;
330} 287}
331early_param("coherent_pool", early_coherent_pool); 288early_param("coherent_pool", early_coherent_pool);
@@ -333,32 +290,45 @@ early_param("coherent_pool", early_coherent_pool);
333/* 290/*
334 * Initialise the coherent pool for atomic allocations. 291 * Initialise the coherent pool for atomic allocations.
335 */ 292 */
336static int __init coherent_init(void) 293static int __init atomic_pool_init(void)
337{ 294{
295 struct dma_pool *pool = &atomic_pool;
338 pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); 296 pgprot_t prot = pgprot_dmacoherent(pgprot_kernel);
339 size_t size = coherent_pool_size; 297 unsigned long nr_pages = pool->size >> PAGE_SHIFT;
298 unsigned long *bitmap;
340 struct page *page; 299 struct page *page;
341 void *ptr; 300 void *ptr;
301 int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long);
342 302
343 if (!IS_ENABLED(CONFIG_CMA)) 303 bitmap = kzalloc(bitmap_size, GFP_KERNEL);
344 return 0; 304 if (!bitmap)
305 goto no_bitmap;
345 306
346 ptr = __alloc_from_contiguous(NULL, size, prot, &page); 307 if (IS_ENABLED(CONFIG_CMA))
308 ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page);
309 else
310 ptr = __alloc_remap_buffer(NULL, pool->size, GFP_KERNEL, prot,
311 &page, NULL);
347 if (ptr) { 312 if (ptr) {
348 coherent_head.vm_start = (unsigned long) ptr; 313 spin_lock_init(&pool->lock);
349 coherent_head.vm_end = (unsigned long) ptr + size; 314 pool->vaddr = ptr;
350 printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n", 315 pool->page = page;
351 (unsigned)size / 1024); 316 pool->bitmap = bitmap;
317 pool->nr_pages = nr_pages;
318 pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n",
319 (unsigned)pool->size / 1024);
352 return 0; 320 return 0;
353 } 321 }
354 printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", 322 kfree(bitmap);
355 (unsigned)size / 1024); 323no_bitmap:
324 pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
325 (unsigned)pool->size / 1024);
356 return -ENOMEM; 326 return -ENOMEM;
357} 327}
358/* 328/*
359 * CMA is activated by core_initcall, so we must be called after it. 329 * CMA is activated by core_initcall, so we must be called after it.
360 */ 330 */
361postcore_initcall(coherent_init); 331postcore_initcall(atomic_pool_init);
362 332
363struct dma_contig_early_reserve { 333struct dma_contig_early_reserve {
364 phys_addr_t base; 334 phys_addr_t base;
@@ -406,112 +376,6 @@ void __init dma_contiguous_remap(void)
406 } 376 }
407} 377}
408 378
409static void *
410__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
411 const void *caller)
412{
413 struct arm_vmregion *c;
414 size_t align;
415 int bit;
416
417 if (!consistent_pte) {
418 pr_err("%s: not initialised\n", __func__);
419 dump_stack();
420 return NULL;
421 }
422
423 /*
424 * Align the virtual region allocation - maximum alignment is
425 * a section size, minimum is a page size. This helps reduce
426 * fragmentation of the DMA space, and also prevents allocations
427 * smaller than a section from crossing a section boundary.
428 */
429 bit = fls(size - 1);
430 if (bit > SECTION_SHIFT)
431 bit = SECTION_SHIFT;
432 align = 1 << bit;
433
434 /*
435 * Allocate a virtual address in the consistent mapping region.
436 */
437 c = arm_vmregion_alloc(&consistent_head, align, size,
438 gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller);
439 if (c) {
440 pte_t *pte;
441 int idx = CONSISTENT_PTE_INDEX(c->vm_start);
442 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
443
444 pte = consistent_pte[idx] + off;
445 c->priv = page;
446
447 do {
448 BUG_ON(!pte_none(*pte));
449
450 set_pte_ext(pte, mk_pte(page, prot), 0);
451 page++;
452 pte++;
453 off++;
454 if (off >= PTRS_PER_PTE) {
455 off = 0;
456 pte = consistent_pte[++idx];
457 }
458 } while (size -= PAGE_SIZE);
459
460 dsb();
461
462 return (void *)c->vm_start;
463 }
464 return NULL;
465}
466
467static void __dma_free_remap(void *cpu_addr, size_t size)
468{
469 struct arm_vmregion *c;
470 unsigned long addr;
471 pte_t *ptep;
472 int idx;
473 u32 off;
474
475 c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr);
476 if (!c) {
477 pr_err("%s: trying to free invalid coherent area: %p\n",
478 __func__, cpu_addr);
479 dump_stack();
480 return;
481 }
482
483 if ((c->vm_end - c->vm_start) != size) {
484 pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
485 __func__, c->vm_end - c->vm_start, size);
486 dump_stack();
487 size = c->vm_end - c->vm_start;
488 }
489
490 idx = CONSISTENT_PTE_INDEX(c->vm_start);
491 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
492 ptep = consistent_pte[idx] + off;
493 addr = c->vm_start;
494 do {
495 pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
496
497 ptep++;
498 addr += PAGE_SIZE;
499 off++;
500 if (off >= PTRS_PER_PTE) {
501 off = 0;
502 ptep = consistent_pte[++idx];
503 }
504
505 if (pte_none(pte) || !pte_present(pte))
506 pr_crit("%s: bad page in kernel page table\n",
507 __func__);
508 } while (size -= PAGE_SIZE);
509
510 flush_tlb_kernel_range(c->vm_start, c->vm_end);
511
512 arm_vmregion_free(&consistent_head, c);
513}
514
515static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, 379static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr,
516 void *data) 380 void *data)
517{ 381{
@@ -552,16 +416,17 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
552 return ptr; 416 return ptr;
553} 417}
554 418
555static void *__alloc_from_pool(struct device *dev, size_t size, 419static void *__alloc_from_pool(size_t size, struct page **ret_page)
556 struct page **ret_page, const void *caller)
557{ 420{
558 struct arm_vmregion *c; 421 struct dma_pool *pool = &atomic_pool;
422 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
423 unsigned int pageno;
424 unsigned long flags;
425 void *ptr = NULL;
559 size_t align; 426 size_t align;
560 427
561 if (!coherent_head.vm_start) { 428 if (!pool->vaddr) {
562 printk(KERN_ERR "%s: coherent pool not initialised!\n", 429 WARN(1, "coherent pool not initialised!\n");
563 __func__);
564 dump_stack();
565 return NULL; 430 return NULL;
566 } 431 }
567 432
@@ -571,35 +436,41 @@ static void *__alloc_from_pool(struct device *dev, size_t size,
571 * size. This helps reduce fragmentation of the DMA space. 436 * size. This helps reduce fragmentation of the DMA space.
572 */ 437 */
573 align = PAGE_SIZE << get_order(size); 438 align = PAGE_SIZE << get_order(size);
574 c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller); 439
575 if (c) { 440 spin_lock_irqsave(&pool->lock, flags);
576 void *ptr = (void *)c->vm_start; 441 pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages,
577 struct page *page = virt_to_page(ptr); 442 0, count, (1 << align) - 1);
578 *ret_page = page; 443 if (pageno < pool->nr_pages) {
579 return ptr; 444 bitmap_set(pool->bitmap, pageno, count);
445 ptr = pool->vaddr + PAGE_SIZE * pageno;
446 *ret_page = pool->page + pageno;
580 } 447 }
581 return NULL; 448 spin_unlock_irqrestore(&pool->lock, flags);
449
450 return ptr;
582} 451}
583 452
584static int __free_from_pool(void *cpu_addr, size_t size) 453static int __free_from_pool(void *start, size_t size)
585{ 454{
586 unsigned long start = (unsigned long)cpu_addr; 455 struct dma_pool *pool = &atomic_pool;
587 unsigned long end = start + size; 456 unsigned long pageno, count;
588 struct arm_vmregion *c; 457 unsigned long flags;
589 458
590 if (start < coherent_head.vm_start || end > coherent_head.vm_end) 459 if (start < pool->vaddr || start > pool->vaddr + pool->size)
591 return 0; 460 return 0;
592 461
593 c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start); 462 if (start + size > pool->vaddr + pool->size) {
594 463 WARN(1, "freeing wrong coherent size from pool\n");
595 if ((c->vm_end - c->vm_start) != size) { 464 return 0;
596 printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
597 __func__, c->vm_end - c->vm_start, size);
598 dump_stack();
599 size = c->vm_end - c->vm_start;
600 } 465 }
601 466
602 arm_vmregion_free(&coherent_head, c); 467 pageno = (start - pool->vaddr) >> PAGE_SHIFT;
468 count = size >> PAGE_SHIFT;
469
470 spin_lock_irqsave(&pool->lock, flags);
471 bitmap_clear(pool->bitmap, pageno, count);
472 spin_unlock_irqrestore(&pool->lock, flags);
473
603 return 1; 474 return 1;
604} 475}
605 476
@@ -644,7 +515,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
644 515
645#define __get_dma_pgprot(attrs, prot) __pgprot(0) 516#define __get_dma_pgprot(attrs, prot) __pgprot(0)
646#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL 517#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL
647#define __alloc_from_pool(dev, size, ret_page, c) NULL 518#define __alloc_from_pool(size, ret_page) NULL
648#define __alloc_from_contiguous(dev, size, prot, ret) NULL 519#define __alloc_from_contiguous(dev, size, prot, ret) NULL
649#define __free_from_pool(cpu_addr, size) 0 520#define __free_from_pool(cpu_addr, size) 0
650#define __free_from_contiguous(dev, page, size) do { } while (0) 521#define __free_from_contiguous(dev, page, size) do { } while (0)
@@ -702,10 +573,10 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
702 573
703 if (arch_is_coherent() || nommu()) 574 if (arch_is_coherent() || nommu())
704 addr = __alloc_simple_buffer(dev, size, gfp, &page); 575 addr = __alloc_simple_buffer(dev, size, gfp, &page);
576 else if (gfp & GFP_ATOMIC)
577 addr = __alloc_from_pool(size, &page);
705 else if (!IS_ENABLED(CONFIG_CMA)) 578 else if (!IS_ENABLED(CONFIG_CMA))
706 addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); 579 addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
707 else if (gfp & GFP_ATOMIC)
708 addr = __alloc_from_pool(dev, size, &page, caller);
709 else 580 else
710 addr = __alloc_from_contiguous(dev, size, prot, &page); 581 addr = __alloc_from_contiguous(dev, size, prot, &page);
711 582
@@ -741,16 +612,22 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
741{ 612{
742 int ret = -ENXIO; 613 int ret = -ENXIO;
743#ifdef CONFIG_MMU 614#ifdef CONFIG_MMU
615 unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
616 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
744 unsigned long pfn = dma_to_pfn(dev, dma_addr); 617 unsigned long pfn = dma_to_pfn(dev, dma_addr);
618 unsigned long off = vma->vm_pgoff;
619
745 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); 620 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
746 621
747 if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) 622 if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
748 return ret; 623 return ret;
749 624
750 ret = remap_pfn_range(vma, vma->vm_start, 625 if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
751 pfn + vma->vm_pgoff, 626 ret = remap_pfn_range(vma, vma->vm_start,
752 vma->vm_end - vma->vm_start, 627 pfn + off,
753 vma->vm_page_prot); 628 vma->vm_end - vma->vm_start,
629 vma->vm_page_prot);
630 }
754#endif /* CONFIG_MMU */ 631#endif /* CONFIG_MMU */
755 632
756 return ret; 633 return ret;
@@ -785,6 +662,21 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
785 } 662 }
786} 663}
787 664
665int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
666 void *cpu_addr, dma_addr_t handle, size_t size,
667 struct dma_attrs *attrs)
668{
669 struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
670 int ret;
671
672 ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
673 if (unlikely(ret))
674 return ret;
675
676 sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
677 return 0;
678}
679
788static void dma_cache_maint_page(struct page *page, unsigned long offset, 680static void dma_cache_maint_page(struct page *page, unsigned long offset,
789 size_t size, enum dma_data_direction dir, 681 size_t size, enum dma_data_direction dir,
790 void (*op)(const void *, size_t, int)) 682 void (*op)(const void *, size_t, int))
@@ -998,9 +890,6 @@ static int arm_dma_set_mask(struct device *dev, u64 dma_mask)
998 890
999static int __init dma_debug_do_init(void) 891static int __init dma_debug_do_init(void)
1000{ 892{
1001#ifdef CONFIG_MMU
1002 arm_vmregion_create_proc("dma-mappings", &consistent_head);
1003#endif
1004 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); 893 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
1005 return 0; 894 return 0;
1006} 895}
@@ -1088,7 +977,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t
1088 977
1089 return pages; 978 return pages;
1090error: 979error:
1091 while (--i) 980 while (i--)
1092 if (pages[i]) 981 if (pages[i])
1093 __free_pages(pages[i], 0); 982 __free_pages(pages[i], 0);
1094 if (array_size <= PAGE_SIZE) 983 if (array_size <= PAGE_SIZE)
@@ -1117,61 +1006,32 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t s
1117 * Create a CPU mapping for a specified pages 1006 * Create a CPU mapping for a specified pages
1118 */ 1007 */
1119static void * 1008static void *
1120__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot) 1009__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
1010 const void *caller)
1121{ 1011{
1122 struct arm_vmregion *c; 1012 unsigned int i, nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
1123 size_t align; 1013 struct vm_struct *area;
1124 size_t count = size >> PAGE_SHIFT; 1014 unsigned long p;
1125 int bit;
1126 1015
1127 if (!consistent_pte[0]) { 1016 area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP,
1128 pr_err("%s: not initialised\n", __func__); 1017 caller);
1129 dump_stack(); 1018 if (!area)
1130 return NULL; 1019 return NULL;
1131 }
1132
1133 /*
1134 * Align the virtual region allocation - maximum alignment is
1135 * a section size, minimum is a page size. This helps reduce
1136 * fragmentation of the DMA space, and also prevents allocations
1137 * smaller than a section from crossing a section boundary.
1138 */
1139 bit = fls(size - 1);
1140 if (bit > SECTION_SHIFT)
1141 bit = SECTION_SHIFT;
1142 align = 1 << bit;
1143
1144 /*
1145 * Allocate a virtual address in the consistent mapping region.
1146 */
1147 c = arm_vmregion_alloc(&consistent_head, align, size,
1148 gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL);
1149 if (c) {
1150 pte_t *pte;
1151 int idx = CONSISTENT_PTE_INDEX(c->vm_start);
1152 int i = 0;
1153 u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
1154
1155 pte = consistent_pte[idx] + off;
1156 c->priv = pages;
1157
1158 do {
1159 BUG_ON(!pte_none(*pte));
1160
1161 set_pte_ext(pte, mk_pte(pages[i], prot), 0);
1162 pte++;
1163 off++;
1164 i++;
1165 if (off >= PTRS_PER_PTE) {
1166 off = 0;
1167 pte = consistent_pte[++idx];
1168 }
1169 } while (i < count);
1170 1020
1171 dsb(); 1021 area->pages = pages;
1022 area->nr_pages = nr_pages;
1023 p = (unsigned long)area->addr;
1172 1024
1173 return (void *)c->vm_start; 1025 for (i = 0; i < nr_pages; i++) {
1026 phys_addr_t phys = __pfn_to_phys(page_to_pfn(pages[i]));
1027 if (ioremap_page_range(p, p + PAGE_SIZE, phys, prot))
1028 goto err;
1029 p += PAGE_SIZE;
1174 } 1030 }
1031 return area->addr;
1032err:
1033 unmap_kernel_range((unsigned long)area->addr, size);
1034 vunmap(area->addr);
1175 return NULL; 1035 return NULL;
1176} 1036}
1177 1037
@@ -1230,6 +1090,19 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si
1230 return 0; 1090 return 0;
1231} 1091}
1232 1092
1093static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs)
1094{
1095 struct vm_struct *area;
1096
1097 if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
1098 return cpu_addr;
1099
1100 area = find_vm_area(cpu_addr);
1101 if (area && (area->flags & VM_ARM_DMA_CONSISTENT))
1102 return area->pages;
1103 return NULL;
1104}
1105
1233static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, 1106static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
1234 dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) 1107 dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
1235{ 1108{
@@ -1248,7 +1121,11 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
1248 if (*handle == DMA_ERROR_CODE) 1121 if (*handle == DMA_ERROR_CODE)
1249 goto err_buffer; 1122 goto err_buffer;
1250 1123
1251 addr = __iommu_alloc_remap(pages, size, gfp, prot); 1124 if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
1125 return pages;
1126
1127 addr = __iommu_alloc_remap(pages, size, gfp, prot,
1128 __builtin_return_address(0));
1252 if (!addr) 1129 if (!addr)
1253 goto err_mapping; 1130 goto err_mapping;
1254 1131
@@ -1265,31 +1142,25 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
1265 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1142 void *cpu_addr, dma_addr_t dma_addr, size_t size,
1266 struct dma_attrs *attrs) 1143 struct dma_attrs *attrs)
1267{ 1144{
1268 struct arm_vmregion *c; 1145 unsigned long uaddr = vma->vm_start;
1146 unsigned long usize = vma->vm_end - vma->vm_start;
1147 struct page **pages = __iommu_get_pages(cpu_addr, attrs);
1269 1148
1270 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); 1149 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
1271 c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
1272
1273 if (c) {
1274 struct page **pages = c->priv;
1275
1276 unsigned long uaddr = vma->vm_start;
1277 unsigned long usize = vma->vm_end - vma->vm_start;
1278 int i = 0;
1279 1150
1280 do { 1151 if (!pages)
1281 int ret; 1152 return -ENXIO;
1282 1153
1283 ret = vm_insert_page(vma, uaddr, pages[i++]); 1154 do {
1284 if (ret) { 1155 int ret = vm_insert_page(vma, uaddr, *pages++);
1285 pr_err("Remapping memory, error: %d\n", ret); 1156 if (ret) {
1286 return ret; 1157 pr_err("Remapping memory failed: %d\n", ret);
1287 } 1158 return ret;
1159 }
1160 uaddr += PAGE_SIZE;
1161 usize -= PAGE_SIZE;
1162 } while (usize > 0);
1288 1163
1289 uaddr += PAGE_SIZE;
1290 usize -= PAGE_SIZE;
1291 } while (usize > 0);
1292 }
1293 return 0; 1164 return 0;
1294} 1165}
1295 1166
@@ -1300,16 +1171,35 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
1300void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, 1171void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
1301 dma_addr_t handle, struct dma_attrs *attrs) 1172 dma_addr_t handle, struct dma_attrs *attrs)
1302{ 1173{
1303 struct arm_vmregion *c; 1174 struct page **pages = __iommu_get_pages(cpu_addr, attrs);
1304 size = PAGE_ALIGN(size); 1175 size = PAGE_ALIGN(size);
1305 1176
1306 c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); 1177 if (!pages) {
1307 if (c) { 1178 WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
1308 struct page **pages = c->priv; 1179 return;
1309 __dma_free_remap(cpu_addr, size);
1310 __iommu_remove_mapping(dev, handle, size);
1311 __iommu_free_buffer(dev, pages, size);
1312 } 1180 }
1181
1182 if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) {
1183 unmap_kernel_range((unsigned long)cpu_addr, size);
1184 vunmap(cpu_addr);
1185 }
1186
1187 __iommu_remove_mapping(dev, handle, size);
1188 __iommu_free_buffer(dev, pages, size);
1189}
1190
1191static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
1192 void *cpu_addr, dma_addr_t dma_addr,
1193 size_t size, struct dma_attrs *attrs)
1194{
1195 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
1196 struct page **pages = __iommu_get_pages(cpu_addr, attrs);
1197
1198 if (!pages)
1199 return -ENXIO;
1200
1201 return sg_alloc_table_from_pages(sgt, pages, count, 0, size,
1202 GFP_KERNEL);
1313} 1203}
1314 1204
1315/* 1205/*
@@ -1317,7 +1207,7 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
1317 */ 1207 */
1318static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, 1208static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
1319 size_t size, dma_addr_t *handle, 1209 size_t size, dma_addr_t *handle,
1320 enum dma_data_direction dir) 1210 enum dma_data_direction dir, struct dma_attrs *attrs)
1321{ 1211{
1322 struct dma_iommu_mapping *mapping = dev->archdata.mapping; 1212 struct dma_iommu_mapping *mapping = dev->archdata.mapping;
1323 dma_addr_t iova, iova_base; 1213 dma_addr_t iova, iova_base;
@@ -1336,7 +1226,8 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
1336 phys_addr_t phys = page_to_phys(sg_page(s)); 1226 phys_addr_t phys = page_to_phys(sg_page(s));
1337 unsigned int len = PAGE_ALIGN(s->offset + s->length); 1227 unsigned int len = PAGE_ALIGN(s->offset + s->length);
1338 1228
1339 if (!arch_is_coherent()) 1229 if (!arch_is_coherent() &&
1230 !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
1340 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); 1231 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
1341 1232
1342 ret = iommu_map(mapping->domain, iova, phys, len, 0); 1233 ret = iommu_map(mapping->domain, iova, phys, len, 0);
@@ -1383,7 +1274,7 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
1383 1274
1384 if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { 1275 if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
1385 if (__map_sg_chunk(dev, start, size, &dma->dma_address, 1276 if (__map_sg_chunk(dev, start, size, &dma->dma_address,
1386 dir) < 0) 1277 dir, attrs) < 0)
1387 goto bad_mapping; 1278 goto bad_mapping;
1388 1279
1389 dma->dma_address += offset; 1280 dma->dma_address += offset;
@@ -1396,7 +1287,7 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
1396 } 1287 }
1397 size += s->length; 1288 size += s->length;
1398 } 1289 }
1399 if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0) 1290 if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs) < 0)
1400 goto bad_mapping; 1291 goto bad_mapping;
1401 1292
1402 dma->dma_address += offset; 1293 dma->dma_address += offset;
@@ -1430,7 +1321,8 @@ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
1430 if (sg_dma_len(s)) 1321 if (sg_dma_len(s))
1431 __iommu_remove_mapping(dev, sg_dma_address(s), 1322 __iommu_remove_mapping(dev, sg_dma_address(s),
1432 sg_dma_len(s)); 1323 sg_dma_len(s));
1433 if (!arch_is_coherent()) 1324 if (!arch_is_coherent() &&
1325 !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
1434 __dma_page_dev_to_cpu(sg_page(s), s->offset, 1326 __dma_page_dev_to_cpu(sg_page(s), s->offset,
1435 s->length, dir); 1327 s->length, dir);
1436 } 1328 }
@@ -1492,7 +1384,7 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
1492 dma_addr_t dma_addr; 1384 dma_addr_t dma_addr;
1493 int ret, len = PAGE_ALIGN(size + offset); 1385 int ret, len = PAGE_ALIGN(size + offset);
1494 1386
1495 if (!arch_is_coherent()) 1387 if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
1496 __dma_page_cpu_to_dev(page, offset, size, dir); 1388 __dma_page_cpu_to_dev(page, offset, size, dir);
1497 1389
1498 dma_addr = __alloc_iova(mapping, len); 1390 dma_addr = __alloc_iova(mapping, len);
@@ -1531,7 +1423,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
1531 if (!iova) 1423 if (!iova)
1532 return; 1424 return;
1533 1425
1534 if (!arch_is_coherent()) 1426 if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
1535 __dma_page_dev_to_cpu(page, offset, size, dir); 1427 __dma_page_dev_to_cpu(page, offset, size, dir);
1536 1428
1537 iommu_unmap(mapping->domain, iova, len); 1429 iommu_unmap(mapping->domain, iova, len);
@@ -1571,6 +1463,7 @@ struct dma_map_ops iommu_ops = {
1571 .alloc = arm_iommu_alloc_attrs, 1463 .alloc = arm_iommu_alloc_attrs,
1572 .free = arm_iommu_free_attrs, 1464 .free = arm_iommu_free_attrs,
1573 .mmap = arm_iommu_mmap_attrs, 1465 .mmap = arm_iommu_mmap_attrs,
1466 .get_sgtable = arm_iommu_get_sgtable,
1574 1467
1575 .map_page = arm_iommu_map_page, 1468 .map_page = arm_iommu_map_page,
1576 .unmap_page = arm_iommu_unmap_page, 1469 .unmap_page = arm_iommu_unmap_page,
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 2e8a1efdf7b8..6776160618ef 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -59,6 +59,9 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page
59#define VM_ARM_MTYPE(mt) ((mt) << 20) 59#define VM_ARM_MTYPE(mt) ((mt) << 20)
60#define VM_ARM_MTYPE_MASK (0x1f << 20) 60#define VM_ARM_MTYPE_MASK (0x1f << 20)
61 61
62/* consistent regions used by dma_alloc_attrs() */
63#define VM_ARM_DMA_CONSISTENT 0x20000000
64
62#endif 65#endif
63 66
64#ifdef CONFIG_ZONE_DMA 67#ifdef CONFIG_ZONE_DMA