diff options
author | Marek Szyprowski <m.szyprowski@samsung.com> | 2011-12-29 07:09:51 -0500 |
---|---|---|
committer | Marek Szyprowski <m.szyprowski@samsung.com> | 2012-05-21 09:09:38 -0400 |
commit | c79095092834a18ae74cfc08def1a5a101dc106c (patch) | |
tree | c6cd81c38b92dcdb269288ab9a125bc13f4bb339 | |
parent | 0a2b9a6ea93650b8a00f9fd5ee8fdd25671e2df6 (diff) |
ARM: integrate CMA with DMA-mapping subsystem
This patch adds support for CMA to dma-mapping subsystem for ARM
architecture. By default a global CMA area is used, but specific devices
are allowed to have their private memory areas if required (they can be
created with dma_declare_contiguous() function during board
initialisation).
Contiguous memory areas reserved for DMA are remapped with 2-level page
tables on boot. Once a buffer is requested, a low memory kernel mapping
is updated to to match requested memory access type.
GFP_ATOMIC allocations are performed from special pool which is created
early during boot. This way remapping page attributes is not needed on
allocation time.
CMA has been enabled unconditionally for ARMv6+ systems.
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
CC: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Rob Clark <rob.clark@linaro.org>
Tested-by: Ohad Ben-Cohen <ohad@wizery.com>
Tested-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Tested-by: Robert Nelson <robertcnelson@gmail.com>
Tested-by: Barry Song <Baohua.Song@csr.com>
-rw-r--r-- | Documentation/kernel-parameters.txt | 4 | ||||
-rw-r--r-- | arch/arm/Kconfig | 2 | ||||
-rw-r--r-- | arch/arm/include/asm/dma-contiguous.h | 15 | ||||
-rw-r--r-- | arch/arm/include/asm/mach/map.h | 1 | ||||
-rw-r--r-- | arch/arm/kernel/setup.c | 9 | ||||
-rw-r--r-- | arch/arm/mm/dma-mapping.c | 370 | ||||
-rw-r--r-- | arch/arm/mm/init.c | 23 | ||||
-rw-r--r-- | arch/arm/mm/mm.h | 3 | ||||
-rw-r--r-- | arch/arm/mm/mmu.c | 31 |
9 files changed, 370 insertions, 88 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 669e8bb52b94..41996c68a5cd 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -520,6 +520,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
520 | a hypervisor. | 520 | a hypervisor. |
521 | Default: yes | 521 | Default: yes |
522 | 522 | ||
523 | coherent_pool=nn[KMG] [ARM,KNL] | ||
524 | Sets the size of memory pool for coherent, atomic dma | ||
525 | allocations if Contiguous Memory Allocator (CMA) is used. | ||
526 | |||
523 | code_bytes [X86] How many bytes of object code to print | 527 | code_bytes [X86] How many bytes of object code to print |
524 | in an oops report. | 528 | in an oops report. |
525 | Range: 0 - 8192 | 529 | Range: 0 - 8192 |
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 36586dba6fa6..cbbbc45f6b67 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
@@ -4,6 +4,8 @@ config ARM | |||
4 | select HAVE_AOUT | 4 | select HAVE_AOUT |
5 | select HAVE_DMA_API_DEBUG | 5 | select HAVE_DMA_API_DEBUG |
6 | select HAVE_IDE if PCI || ISA || PCMCIA | 6 | select HAVE_IDE if PCI || ISA || PCMCIA |
7 | select HAVE_DMA_CONTIGUOUS if (CPU_V6 || CPU_V6K || CPU_V7) | ||
8 | select CMA if (CPU_V6 || CPU_V6K || CPU_V7) | ||
7 | select HAVE_MEMBLOCK | 9 | select HAVE_MEMBLOCK |
8 | select RTC_LIB | 10 | select RTC_LIB |
9 | select SYS_SUPPORTS_APM_EMULATION | 11 | select SYS_SUPPORTS_APM_EMULATION |
diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h new file mode 100644 index 000000000000..3ed37b4d93da --- /dev/null +++ b/arch/arm/include/asm/dma-contiguous.h | |||
@@ -0,0 +1,15 @@ | |||
1 | #ifndef ASMARM_DMA_CONTIGUOUS_H | ||
2 | #define ASMARM_DMA_CONTIGUOUS_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | #ifdef CONFIG_CMA | ||
6 | |||
7 | #include <linux/types.h> | ||
8 | #include <asm-generic/dma-contiguous.h> | ||
9 | |||
10 | void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size); | ||
11 | |||
12 | #endif | ||
13 | #endif | ||
14 | |||
15 | #endif | ||
diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index b36f3654bf54..a6efcdd6fd25 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h | |||
@@ -30,6 +30,7 @@ struct map_desc { | |||
30 | #define MT_MEMORY_DTCM 12 | 30 | #define MT_MEMORY_DTCM 12 |
31 | #define MT_MEMORY_ITCM 13 | 31 | #define MT_MEMORY_ITCM 13 |
32 | #define MT_MEMORY_SO 14 | 32 | #define MT_MEMORY_SO 14 |
33 | #define MT_MEMORY_DMA_READY 15 | ||
33 | 34 | ||
34 | #ifdef CONFIG_MMU | 35 | #ifdef CONFIG_MMU |
35 | extern void iotable_init(struct map_desc *, int); | 36 | extern void iotable_init(struct map_desc *, int); |
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index ebfac782593f..1b3096dfb964 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c | |||
@@ -81,6 +81,7 @@ __setup("fpe=", fpe_setup); | |||
81 | extern void paging_init(struct machine_desc *desc); | 81 | extern void paging_init(struct machine_desc *desc); |
82 | extern void sanity_check_meminfo(void); | 82 | extern void sanity_check_meminfo(void); |
83 | extern void reboot_setup(char *str); | 83 | extern void reboot_setup(char *str); |
84 | extern void setup_dma_zone(struct machine_desc *desc); | ||
84 | 85 | ||
85 | unsigned int processor_id; | 86 | unsigned int processor_id; |
86 | EXPORT_SYMBOL(processor_id); | 87 | EXPORT_SYMBOL(processor_id); |
@@ -939,12 +940,8 @@ void __init setup_arch(char **cmdline_p) | |||
939 | machine_desc = mdesc; | 940 | machine_desc = mdesc; |
940 | machine_name = mdesc->name; | 941 | machine_name = mdesc->name; |
941 | 942 | ||
942 | #ifdef CONFIG_ZONE_DMA | 943 | setup_dma_zone(mdesc); |
943 | if (mdesc->dma_zone_size) { | 944 | |
944 | extern unsigned long arm_dma_zone_size; | ||
945 | arm_dma_zone_size = mdesc->dma_zone_size; | ||
946 | } | ||
947 | #endif | ||
948 | if (mdesc->restart_mode) | 945 | if (mdesc->restart_mode) |
949 | reboot_setup(&mdesc->restart_mode); | 946 | reboot_setup(&mdesc->restart_mode); |
950 | 947 | ||
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index db23ae4aaaab..302f5bfb17f4 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c | |||
@@ -17,7 +17,9 @@ | |||
17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
18 | #include <linux/device.h> | 18 | #include <linux/device.h> |
19 | #include <linux/dma-mapping.h> | 19 | #include <linux/dma-mapping.h> |
20 | #include <linux/dma-contiguous.h> | ||
20 | #include <linux/highmem.h> | 21 | #include <linux/highmem.h> |
22 | #include <linux/memblock.h> | ||
21 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
22 | 24 | ||
23 | #include <asm/memory.h> | 25 | #include <asm/memory.h> |
@@ -26,6 +28,9 @@ | |||
26 | #include <asm/tlbflush.h> | 28 | #include <asm/tlbflush.h> |
27 | #include <asm/sizes.h> | 29 | #include <asm/sizes.h> |
28 | #include <asm/mach/arch.h> | 30 | #include <asm/mach/arch.h> |
31 | #include <asm/mach/map.h> | ||
32 | #include <asm/system_info.h> | ||
33 | #include <asm/dma-contiguous.h> | ||
29 | 34 | ||
30 | #include "mm.h" | 35 | #include "mm.h" |
31 | 36 | ||
@@ -56,6 +61,19 @@ static u64 get_coherent_dma_mask(struct device *dev) | |||
56 | return mask; | 61 | return mask; |
57 | } | 62 | } |
58 | 63 | ||
64 | static void __dma_clear_buffer(struct page *page, size_t size) | ||
65 | { | ||
66 | void *ptr; | ||
67 | /* | ||
68 | * Ensure that the allocated pages are zeroed, and that any data | ||
69 | * lurking in the kernel direct-mapped region is invalidated. | ||
70 | */ | ||
71 | ptr = page_address(page); | ||
72 | memset(ptr, 0, size); | ||
73 | dmac_flush_range(ptr, ptr + size); | ||
74 | outer_flush_range(__pa(ptr), __pa(ptr) + size); | ||
75 | } | ||
76 | |||
59 | /* | 77 | /* |
60 | * Allocate a DMA buffer for 'dev' of size 'size' using the | 78 | * Allocate a DMA buffer for 'dev' of size 'size' using the |
61 | * specified gfp mask. Note that 'size' must be page aligned. | 79 | * specified gfp mask. Note that 'size' must be page aligned. |
@@ -64,23 +82,6 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf | |||
64 | { | 82 | { |
65 | unsigned long order = get_order(size); | 83 | unsigned long order = get_order(size); |
66 | struct page *page, *p, *e; | 84 | struct page *page, *p, *e; |
67 | void *ptr; | ||
68 | u64 mask = get_coherent_dma_mask(dev); | ||
69 | |||
70 | #ifdef CONFIG_DMA_API_DEBUG | ||
71 | u64 limit = (mask + 1) & ~mask; | ||
72 | if (limit && size >= limit) { | ||
73 | dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", | ||
74 | size, mask); | ||
75 | return NULL; | ||
76 | } | ||
77 | #endif | ||
78 | |||
79 | if (!mask) | ||
80 | return NULL; | ||
81 | |||
82 | if (mask < 0xffffffffULL) | ||
83 | gfp |= GFP_DMA; | ||
84 | 85 | ||
85 | page = alloc_pages(gfp, order); | 86 | page = alloc_pages(gfp, order); |
86 | if (!page) | 87 | if (!page) |
@@ -93,14 +94,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf | |||
93 | for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) | 94 | for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) |
94 | __free_page(p); | 95 | __free_page(p); |
95 | 96 | ||
96 | /* | 97 | __dma_clear_buffer(page, size); |
97 | * Ensure that the allocated pages are zeroed, and that any data | ||
98 | * lurking in the kernel direct-mapped region is invalidated. | ||
99 | */ | ||
100 | ptr = page_address(page); | ||
101 | memset(ptr, 0, size); | ||
102 | dmac_flush_range(ptr, ptr + size); | ||
103 | outer_flush_range(__pa(ptr), __pa(ptr) + size); | ||
104 | 98 | ||
105 | return page; | 99 | return page; |
106 | } | 100 | } |
@@ -170,6 +164,9 @@ static int __init consistent_init(void) | |||
170 | unsigned long base = consistent_base; | 164 | unsigned long base = consistent_base; |
171 | unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; | 165 | unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; |
172 | 166 | ||
167 | if (cpu_architecture() >= CPU_ARCH_ARMv6) | ||
168 | return 0; | ||
169 | |||
173 | consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); | 170 | consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); |
174 | if (!consistent_pte) { | 171 | if (!consistent_pte) { |
175 | pr_err("%s: no memory\n", __func__); | 172 | pr_err("%s: no memory\n", __func__); |
@@ -210,9 +207,101 @@ static int __init consistent_init(void) | |||
210 | 207 | ||
211 | return ret; | 208 | return ret; |
212 | } | 209 | } |
213 | |||
214 | core_initcall(consistent_init); | 210 | core_initcall(consistent_init); |
215 | 211 | ||
212 | static void *__alloc_from_contiguous(struct device *dev, size_t size, | ||
213 | pgprot_t prot, struct page **ret_page); | ||
214 | |||
215 | static struct arm_vmregion_head coherent_head = { | ||
216 | .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock), | ||
217 | .vm_list = LIST_HEAD_INIT(coherent_head.vm_list), | ||
218 | }; | ||
219 | |||
220 | size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8; | ||
221 | |||
222 | static int __init early_coherent_pool(char *p) | ||
223 | { | ||
224 | coherent_pool_size = memparse(p, &p); | ||
225 | return 0; | ||
226 | } | ||
227 | early_param("coherent_pool", early_coherent_pool); | ||
228 | |||
229 | /* | ||
230 | * Initialise the coherent pool for atomic allocations. | ||
231 | */ | ||
232 | static int __init coherent_init(void) | ||
233 | { | ||
234 | pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); | ||
235 | size_t size = coherent_pool_size; | ||
236 | struct page *page; | ||
237 | void *ptr; | ||
238 | |||
239 | if (cpu_architecture() < CPU_ARCH_ARMv6) | ||
240 | return 0; | ||
241 | |||
242 | ptr = __alloc_from_contiguous(NULL, size, prot, &page); | ||
243 | if (ptr) { | ||
244 | coherent_head.vm_start = (unsigned long) ptr; | ||
245 | coherent_head.vm_end = (unsigned long) ptr + size; | ||
246 | printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n", | ||
247 | (unsigned)size / 1024); | ||
248 | return 0; | ||
249 | } | ||
250 | printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", | ||
251 | (unsigned)size / 1024); | ||
252 | return -ENOMEM; | ||
253 | } | ||
254 | /* | ||
255 | * CMA is activated by core_initcall, so we must be called after it. | ||
256 | */ | ||
257 | postcore_initcall(coherent_init); | ||
258 | |||
259 | struct dma_contig_early_reserve { | ||
260 | phys_addr_t base; | ||
261 | unsigned long size; | ||
262 | }; | ||
263 | |||
264 | static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; | ||
265 | |||
266 | static int dma_mmu_remap_num __initdata; | ||
267 | |||
268 | void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) | ||
269 | { | ||
270 | dma_mmu_remap[dma_mmu_remap_num].base = base; | ||
271 | dma_mmu_remap[dma_mmu_remap_num].size = size; | ||
272 | dma_mmu_remap_num++; | ||
273 | } | ||
274 | |||
275 | void __init dma_contiguous_remap(void) | ||
276 | { | ||
277 | int i; | ||
278 | for (i = 0; i < dma_mmu_remap_num; i++) { | ||
279 | phys_addr_t start = dma_mmu_remap[i].base; | ||
280 | phys_addr_t end = start + dma_mmu_remap[i].size; | ||
281 | struct map_desc map; | ||
282 | unsigned long addr; | ||
283 | |||
284 | if (end > arm_lowmem_limit) | ||
285 | end = arm_lowmem_limit; | ||
286 | if (start >= end) | ||
287 | return; | ||
288 | |||
289 | map.pfn = __phys_to_pfn(start); | ||
290 | map.virtual = __phys_to_virt(start); | ||
291 | map.length = end - start; | ||
292 | map.type = MT_MEMORY_DMA_READY; | ||
293 | |||
294 | /* | ||
295 | * Clear previous low-memory mapping | ||
296 | */ | ||
297 | for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); | ||
298 | addr += PGDIR_SIZE) | ||
299 | pmd_clear(pmd_off_k(addr)); | ||
300 | |||
301 | iotable_init(&map, 1); | ||
302 | } | ||
303 | } | ||
304 | |||
216 | static void * | 305 | static void * |
217 | __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, | 306 | __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, |
218 | const void *caller) | 307 | const void *caller) |
@@ -319,20 +408,173 @@ static void __dma_free_remap(void *cpu_addr, size_t size) | |||
319 | arm_vmregion_free(&consistent_head, c); | 408 | arm_vmregion_free(&consistent_head, c); |
320 | } | 409 | } |
321 | 410 | ||
411 | static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, | ||
412 | void *data) | ||
413 | { | ||
414 | struct page *page = virt_to_page(addr); | ||
415 | pgprot_t prot = *(pgprot_t *)data; | ||
416 | |||
417 | set_pte_ext(pte, mk_pte(page, prot), 0); | ||
418 | return 0; | ||
419 | } | ||
420 | |||
421 | static void __dma_remap(struct page *page, size_t size, pgprot_t prot) | ||
422 | { | ||
423 | unsigned long start = (unsigned long) page_address(page); | ||
424 | unsigned end = start + size; | ||
425 | |||
426 | apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot); | ||
427 | dsb(); | ||
428 | flush_tlb_kernel_range(start, end); | ||
429 | } | ||
430 | |||
431 | static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, | ||
432 | pgprot_t prot, struct page **ret_page, | ||
433 | const void *caller) | ||
434 | { | ||
435 | struct page *page; | ||
436 | void *ptr; | ||
437 | page = __dma_alloc_buffer(dev, size, gfp); | ||
438 | if (!page) | ||
439 | return NULL; | ||
440 | |||
441 | ptr = __dma_alloc_remap(page, size, gfp, prot, caller); | ||
442 | if (!ptr) { | ||
443 | __dma_free_buffer(page, size); | ||
444 | return NULL; | ||
445 | } | ||
446 | |||
447 | *ret_page = page; | ||
448 | return ptr; | ||
449 | } | ||
450 | |||
451 | static void *__alloc_from_pool(struct device *dev, size_t size, | ||
452 | struct page **ret_page, const void *caller) | ||
453 | { | ||
454 | struct arm_vmregion *c; | ||
455 | size_t align; | ||
456 | |||
457 | if (!coherent_head.vm_start) { | ||
458 | printk(KERN_ERR "%s: coherent pool not initialised!\n", | ||
459 | __func__); | ||
460 | dump_stack(); | ||
461 | return NULL; | ||
462 | } | ||
463 | |||
464 | /* | ||
465 | * Align the region allocation - allocations from pool are rather | ||
466 | * small, so align them to their order in pages, minimum is a page | ||
467 | * size. This helps reduce fragmentation of the DMA space. | ||
468 | */ | ||
469 | align = PAGE_SIZE << get_order(size); | ||
470 | c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller); | ||
471 | if (c) { | ||
472 | void *ptr = (void *)c->vm_start; | ||
473 | struct page *page = virt_to_page(ptr); | ||
474 | *ret_page = page; | ||
475 | return ptr; | ||
476 | } | ||
477 | return NULL; | ||
478 | } | ||
479 | |||
480 | static int __free_from_pool(void *cpu_addr, size_t size) | ||
481 | { | ||
482 | unsigned long start = (unsigned long)cpu_addr; | ||
483 | unsigned long end = start + size; | ||
484 | struct arm_vmregion *c; | ||
485 | |||
486 | if (start < coherent_head.vm_start || end > coherent_head.vm_end) | ||
487 | return 0; | ||
488 | |||
489 | c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start); | ||
490 | |||
491 | if ((c->vm_end - c->vm_start) != size) { | ||
492 | printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", | ||
493 | __func__, c->vm_end - c->vm_start, size); | ||
494 | dump_stack(); | ||
495 | size = c->vm_end - c->vm_start; | ||
496 | } | ||
497 | |||
498 | arm_vmregion_free(&coherent_head, c); | ||
499 | return 1; | ||
500 | } | ||
501 | |||
502 | static void *__alloc_from_contiguous(struct device *dev, size_t size, | ||
503 | pgprot_t prot, struct page **ret_page) | ||
504 | { | ||
505 | unsigned long order = get_order(size); | ||
506 | size_t count = size >> PAGE_SHIFT; | ||
507 | struct page *page; | ||
508 | |||
509 | page = dma_alloc_from_contiguous(dev, count, order); | ||
510 | if (!page) | ||
511 | return NULL; | ||
512 | |||
513 | __dma_clear_buffer(page, size); | ||
514 | __dma_remap(page, size, prot); | ||
515 | |||
516 | *ret_page = page; | ||
517 | return page_address(page); | ||
518 | } | ||
519 | |||
520 | static void __free_from_contiguous(struct device *dev, struct page *page, | ||
521 | size_t size) | ||
522 | { | ||
523 | __dma_remap(page, size, pgprot_kernel); | ||
524 | dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); | ||
525 | } | ||
526 | |||
527 | #define nommu() 0 | ||
528 | |||
322 | #else /* !CONFIG_MMU */ | 529 | #else /* !CONFIG_MMU */ |
323 | 530 | ||
324 | #define __dma_alloc_remap(page, size, gfp, prot, c) page_address(page) | 531 | #define nommu() 1 |
325 | #define __dma_free_remap(addr, size) do { } while (0) | 532 | |
533 | #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL | ||
534 | #define __alloc_from_pool(dev, size, ret_page, c) NULL | ||
535 | #define __alloc_from_contiguous(dev, size, prot, ret) NULL | ||
536 | #define __free_from_pool(cpu_addr, size) 0 | ||
537 | #define __free_from_contiguous(dev, page, size) do { } while (0) | ||
538 | #define __dma_free_remap(cpu_addr, size) do { } while (0) | ||
326 | 539 | ||
327 | #endif /* CONFIG_MMU */ | 540 | #endif /* CONFIG_MMU */ |
328 | 541 | ||
329 | static void * | 542 | static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, |
330 | __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, | 543 | struct page **ret_page) |
331 | pgprot_t prot, const void *caller) | ||
332 | { | 544 | { |
333 | struct page *page; | 545 | struct page *page; |
546 | page = __dma_alloc_buffer(dev, size, gfp); | ||
547 | if (!page) | ||
548 | return NULL; | ||
549 | |||
550 | *ret_page = page; | ||
551 | return page_address(page); | ||
552 | } | ||
553 | |||
554 | |||
555 | |||
556 | static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, | ||
557 | gfp_t gfp, pgprot_t prot, const void *caller) | ||
558 | { | ||
559 | u64 mask = get_coherent_dma_mask(dev); | ||
560 | struct page *page; | ||
334 | void *addr; | 561 | void *addr; |
335 | 562 | ||
563 | #ifdef CONFIG_DMA_API_DEBUG | ||
564 | u64 limit = (mask + 1) & ~mask; | ||
565 | if (limit && size >= limit) { | ||
566 | dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", | ||
567 | size, mask); | ||
568 | return NULL; | ||
569 | } | ||
570 | #endif | ||
571 | |||
572 | if (!mask) | ||
573 | return NULL; | ||
574 | |||
575 | if (mask < 0xffffffffULL) | ||
576 | gfp |= GFP_DMA; | ||
577 | |||
336 | /* | 578 | /* |
337 | * Following is a work-around (a.k.a. hack) to prevent pages | 579 | * Following is a work-around (a.k.a. hack) to prevent pages |
338 | * with __GFP_COMP being passed to split_page() which cannot | 580 | * with __GFP_COMP being passed to split_page() which cannot |
@@ -345,19 +587,17 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, | |||
345 | *handle = ~0; | 587 | *handle = ~0; |
346 | size = PAGE_ALIGN(size); | 588 | size = PAGE_ALIGN(size); |
347 | 589 | ||
348 | page = __dma_alloc_buffer(dev, size, gfp); | 590 | if (arch_is_coherent() || nommu()) |
349 | if (!page) | 591 | addr = __alloc_simple_buffer(dev, size, gfp, &page); |
350 | return NULL; | 592 | else if (cpu_architecture() < CPU_ARCH_ARMv6) |
351 | 593 | addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); | |
352 | if (!arch_is_coherent()) | 594 | else if (gfp & GFP_ATOMIC) |
353 | addr = __dma_alloc_remap(page, size, gfp, prot, caller); | 595 | addr = __alloc_from_pool(dev, size, &page, caller); |
354 | else | 596 | else |
355 | addr = page_address(page); | 597 | addr = __alloc_from_contiguous(dev, size, prot, &page); |
356 | 598 | ||
357 | if (addr) | 599 | if (addr) |
358 | *handle = pfn_to_dma(dev, page_to_pfn(page)); | 600 | *handle = pfn_to_dma(dev, page_to_pfn(page)); |
359 | else | ||
360 | __dma_free_buffer(page, size); | ||
361 | 601 | ||
362 | return addr; | 602 | return addr; |
363 | } | 603 | } |
@@ -366,8 +606,8 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, | |||
366 | * Allocate DMA-coherent memory space and return both the kernel remapped | 606 | * Allocate DMA-coherent memory space and return both the kernel remapped |
367 | * virtual and bus address for that space. | 607 | * virtual and bus address for that space. |
368 | */ | 608 | */ |
369 | void * | 609 | void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, |
370 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) | 610 | gfp_t gfp) |
371 | { | 611 | { |
372 | void *memory; | 612 | void *memory; |
373 | 613 | ||
@@ -398,25 +638,11 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma, | |||
398 | { | 638 | { |
399 | int ret = -ENXIO; | 639 | int ret = -ENXIO; |
400 | #ifdef CONFIG_MMU | 640 | #ifdef CONFIG_MMU |
401 | unsigned long user_size, kern_size; | 641 | unsigned long pfn = dma_to_pfn(dev, dma_addr); |
402 | struct arm_vmregion *c; | 642 | ret = remap_pfn_range(vma, vma->vm_start, |
403 | 643 | pfn + vma->vm_pgoff, | |
404 | user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | 644 | vma->vm_end - vma->vm_start, |
405 | 645 | vma->vm_page_prot); | |
406 | c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); | ||
407 | if (c) { | ||
408 | unsigned long off = vma->vm_pgoff; | ||
409 | |||
410 | kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; | ||
411 | |||
412 | if (off < kern_size && | ||
413 | user_size <= (kern_size - off)) { | ||
414 | ret = remap_pfn_range(vma, vma->vm_start, | ||
415 | page_to_pfn(c->vm_pages) + off, | ||
416 | user_size << PAGE_SHIFT, | ||
417 | vma->vm_page_prot); | ||
418 | } | ||
419 | } | ||
420 | #endif /* CONFIG_MMU */ | 646 | #endif /* CONFIG_MMU */ |
421 | 647 | ||
422 | return ret; | 648 | return ret; |
@@ -438,23 +664,33 @@ int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, | |||
438 | } | 664 | } |
439 | EXPORT_SYMBOL(dma_mmap_writecombine); | 665 | EXPORT_SYMBOL(dma_mmap_writecombine); |
440 | 666 | ||
667 | |||
441 | /* | 668 | /* |
442 | * free a page as defined by the above mapping. | 669 | * Free a buffer as defined by the above mapping. |
443 | * Must not be called with IRQs disabled. | ||
444 | */ | 670 | */ |
445 | void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) | 671 | void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) |
446 | { | 672 | { |
447 | WARN_ON(irqs_disabled()); | 673 | struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); |
448 | 674 | ||
449 | if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) | 675 | if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) |
450 | return; | 676 | return; |
451 | 677 | ||
452 | size = PAGE_ALIGN(size); | 678 | size = PAGE_ALIGN(size); |
453 | 679 | ||
454 | if (!arch_is_coherent()) | 680 | if (arch_is_coherent() || nommu()) { |
681 | __dma_free_buffer(page, size); | ||
682 | } else if (cpu_architecture() < CPU_ARCH_ARMv6) { | ||
455 | __dma_free_remap(cpu_addr, size); | 683 | __dma_free_remap(cpu_addr, size); |
456 | 684 | __dma_free_buffer(page, size); | |
457 | __dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size); | 685 | } else { |
686 | if (__free_from_pool(cpu_addr, size)) | ||
687 | return; | ||
688 | /* | ||
689 | * Non-atomic allocations cannot be freed with IRQs disabled | ||
690 | */ | ||
691 | WARN_ON(irqs_disabled()); | ||
692 | __free_from_contiguous(dev, page, size); | ||
693 | } | ||
458 | } | 694 | } |
459 | EXPORT_SYMBOL(dma_free_coherent); | 695 | EXPORT_SYMBOL(dma_free_coherent); |
460 | 696 | ||
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 8f5813bbffb5..c21d06c7dd7e 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/highmem.h> | 20 | #include <linux/highmem.h> |
21 | #include <linux/gfp.h> | 21 | #include <linux/gfp.h> |
22 | #include <linux/memblock.h> | 22 | #include <linux/memblock.h> |
23 | #include <linux/dma-contiguous.h> | ||
23 | 24 | ||
24 | #include <asm/mach-types.h> | 25 | #include <asm/mach-types.h> |
25 | #include <asm/memblock.h> | 26 | #include <asm/memblock.h> |
@@ -226,6 +227,17 @@ static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long *hole, | |||
226 | } | 227 | } |
227 | #endif | 228 | #endif |
228 | 229 | ||
230 | void __init setup_dma_zone(struct machine_desc *mdesc) | ||
231 | { | ||
232 | #ifdef CONFIG_ZONE_DMA | ||
233 | if (mdesc->dma_zone_size) { | ||
234 | arm_dma_zone_size = mdesc->dma_zone_size; | ||
235 | arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1; | ||
236 | } else | ||
237 | arm_dma_limit = 0xffffffff; | ||
238 | #endif | ||
239 | } | ||
240 | |||
229 | static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, | 241 | static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, |
230 | unsigned long max_high) | 242 | unsigned long max_high) |
231 | { | 243 | { |
@@ -273,12 +285,9 @@ static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, | |||
273 | * Adjust the sizes according to any special requirements for | 285 | * Adjust the sizes according to any special requirements for |
274 | * this machine type. | 286 | * this machine type. |
275 | */ | 287 | */ |
276 | if (arm_dma_zone_size) { | 288 | if (arm_dma_zone_size) |
277 | arm_adjust_dma_zone(zone_size, zhole_size, | 289 | arm_adjust_dma_zone(zone_size, zhole_size, |
278 | arm_dma_zone_size >> PAGE_SHIFT); | 290 | arm_dma_zone_size >> PAGE_SHIFT); |
279 | arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1; | ||
280 | } else | ||
281 | arm_dma_limit = 0xffffffff; | ||
282 | #endif | 291 | #endif |
283 | 292 | ||
284 | free_area_init_node(0, zone_size, min, zhole_size); | 293 | free_area_init_node(0, zone_size, min, zhole_size); |
@@ -364,6 +373,12 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc) | |||
364 | if (mdesc->reserve) | 373 | if (mdesc->reserve) |
365 | mdesc->reserve(); | 374 | mdesc->reserve(); |
366 | 375 | ||
376 | /* | ||
377 | * reserve memory for DMA contigouos allocations, | ||
378 | * must come from DMA area inside low memory | ||
379 | */ | ||
380 | dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit)); | ||
381 | |||
367 | arm_memblock_steal_permitted = false; | 382 | arm_memblock_steal_permitted = false; |
368 | memblock_allow_resize(); | 383 | memblock_allow_resize(); |
369 | memblock_dump_all(); | 384 | memblock_dump_all(); |
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 27f4a619b35d..93dc0c17cdcb 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h | |||
@@ -67,5 +67,8 @@ extern u32 arm_dma_limit; | |||
67 | #define arm_dma_limit ((u32)~0) | 67 | #define arm_dma_limit ((u32)~0) |
68 | #endif | 68 | #endif |
69 | 69 | ||
70 | extern phys_addr_t arm_lowmem_limit; | ||
71 | |||
70 | void __init bootmem_init(void); | 72 | void __init bootmem_init(void); |
71 | void arm_mm_memblock_reserve(void); | 73 | void arm_mm_memblock_reserve(void); |
74 | void dma_contiguous_remap(void); | ||
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index aa78de8bfdd3..e5dad60b558b 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c | |||
@@ -288,6 +288,11 @@ static struct mem_type mem_types[] = { | |||
288 | PMD_SECT_UNCACHED | PMD_SECT_XN, | 288 | PMD_SECT_UNCACHED | PMD_SECT_XN, |
289 | .domain = DOMAIN_KERNEL, | 289 | .domain = DOMAIN_KERNEL, |
290 | }, | 290 | }, |
291 | [MT_MEMORY_DMA_READY] = { | ||
292 | .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, | ||
293 | .prot_l1 = PMD_TYPE_TABLE, | ||
294 | .domain = DOMAIN_KERNEL, | ||
295 | }, | ||
291 | }; | 296 | }; |
292 | 297 | ||
293 | const struct mem_type *get_mem_type(unsigned int type) | 298 | const struct mem_type *get_mem_type(unsigned int type) |
@@ -429,6 +434,7 @@ static void __init build_mem_type_table(void) | |||
429 | if (arch_is_coherent() && cpu_is_xsc3()) { | 434 | if (arch_is_coherent() && cpu_is_xsc3()) { |
430 | mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; | 435 | mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; |
431 | mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; | 436 | mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; |
437 | mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; | ||
432 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; | 438 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; |
433 | mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; | 439 | mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; |
434 | } | 440 | } |
@@ -460,6 +466,7 @@ static void __init build_mem_type_table(void) | |||
460 | mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; | 466 | mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; |
461 | mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; | 467 | mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; |
462 | mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; | 468 | mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; |
469 | mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; | ||
463 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; | 470 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; |
464 | mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; | 471 | mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; |
465 | } | 472 | } |
@@ -512,6 +519,7 @@ static void __init build_mem_type_table(void) | |||
512 | mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; | 519 | mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; |
513 | mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; | 520 | mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; |
514 | mem_types[MT_MEMORY].prot_pte |= kern_pgprot; | 521 | mem_types[MT_MEMORY].prot_pte |= kern_pgprot; |
522 | mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; | ||
515 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; | 523 | mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; |
516 | mem_types[MT_ROM].prot_sect |= cp->pmd; | 524 | mem_types[MT_ROM].prot_sect |= cp->pmd; |
517 | 525 | ||
@@ -596,7 +604,7 @@ static void __init alloc_init_section(pud_t *pud, unsigned long addr, | |||
596 | * L1 entries, whereas PGDs refer to a group of L1 entries making | 604 | * L1 entries, whereas PGDs refer to a group of L1 entries making |
597 | * up one logical pointer to an L2 table. | 605 | * up one logical pointer to an L2 table. |
598 | */ | 606 | */ |
599 | if (((addr | end | phys) & ~SECTION_MASK) == 0) { | 607 | if (type->prot_sect && ((addr | end | phys) & ~SECTION_MASK) == 0) { |
600 | pmd_t *p = pmd; | 608 | pmd_t *p = pmd; |
601 | 609 | ||
602 | #ifndef CONFIG_ARM_LPAE | 610 | #ifndef CONFIG_ARM_LPAE |
@@ -814,7 +822,7 @@ static int __init early_vmalloc(char *arg) | |||
814 | } | 822 | } |
815 | early_param("vmalloc", early_vmalloc); | 823 | early_param("vmalloc", early_vmalloc); |
816 | 824 | ||
817 | static phys_addr_t lowmem_limit __initdata = 0; | 825 | phys_addr_t arm_lowmem_limit __initdata = 0; |
818 | 826 | ||
819 | void __init sanity_check_meminfo(void) | 827 | void __init sanity_check_meminfo(void) |
820 | { | 828 | { |
@@ -897,8 +905,8 @@ void __init sanity_check_meminfo(void) | |||
897 | bank->size = newsize; | 905 | bank->size = newsize; |
898 | } | 906 | } |
899 | #endif | 907 | #endif |
900 | if (!bank->highmem && bank->start + bank->size > lowmem_limit) | 908 | if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit) |
901 | lowmem_limit = bank->start + bank->size; | 909 | arm_lowmem_limit = bank->start + bank->size; |
902 | 910 | ||
903 | j++; | 911 | j++; |
904 | } | 912 | } |
@@ -923,8 +931,8 @@ void __init sanity_check_meminfo(void) | |||
923 | } | 931 | } |
924 | #endif | 932 | #endif |
925 | meminfo.nr_banks = j; | 933 | meminfo.nr_banks = j; |
926 | high_memory = __va(lowmem_limit - 1) + 1; | 934 | high_memory = __va(arm_lowmem_limit - 1) + 1; |
927 | memblock_set_current_limit(lowmem_limit); | 935 | memblock_set_current_limit(arm_lowmem_limit); |
928 | } | 936 | } |
929 | 937 | ||
930 | static inline void prepare_page_table(void) | 938 | static inline void prepare_page_table(void) |
@@ -949,8 +957,8 @@ static inline void prepare_page_table(void) | |||
949 | * Find the end of the first block of lowmem. | 957 | * Find the end of the first block of lowmem. |
950 | */ | 958 | */ |
951 | end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; | 959 | end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; |
952 | if (end >= lowmem_limit) | 960 | if (end >= arm_lowmem_limit) |
953 | end = lowmem_limit; | 961 | end = arm_lowmem_limit; |
954 | 962 | ||
955 | /* | 963 | /* |
956 | * Clear out all the kernel space mappings, except for the first | 964 | * Clear out all the kernel space mappings, except for the first |
@@ -1093,8 +1101,8 @@ static void __init map_lowmem(void) | |||
1093 | phys_addr_t end = start + reg->size; | 1101 | phys_addr_t end = start + reg->size; |
1094 | struct map_desc map; | 1102 | struct map_desc map; |
1095 | 1103 | ||
1096 | if (end > lowmem_limit) | 1104 | if (end > arm_lowmem_limit) |
1097 | end = lowmem_limit; | 1105 | end = arm_lowmem_limit; |
1098 | if (start >= end) | 1106 | if (start >= end) |
1099 | break; | 1107 | break; |
1100 | 1108 | ||
@@ -1115,11 +1123,12 @@ void __init paging_init(struct machine_desc *mdesc) | |||
1115 | { | 1123 | { |
1116 | void *zero_page; | 1124 | void *zero_page; |
1117 | 1125 | ||
1118 | memblock_set_current_limit(lowmem_limit); | 1126 | memblock_set_current_limit(arm_lowmem_limit); |
1119 | 1127 | ||
1120 | build_mem_type_table(); | 1128 | build_mem_type_table(); |
1121 | prepare_page_table(); | 1129 | prepare_page_table(); |
1122 | map_lowmem(); | 1130 | map_lowmem(); |
1131 | dma_contiguous_remap(); | ||
1123 | devicemaps_init(mdesc); | 1132 | devicemaps_init(mdesc); |
1124 | kmap_init(); | 1133 | kmap_init(); |
1125 | 1134 | ||