aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/mm')
-rw-r--r--arch/arm64/mm/cache.S80
-rw-r--r--arch/arm64/mm/dma-mapping.c246
-rw-r--r--arch/arm64/mm/init.c33
-rw-r--r--arch/arm64/mm/proc.S14
4 files changed, 334 insertions, 39 deletions
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 1ea9f26d1b70..c46f48b33c14 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -30,7 +30,7 @@
30 * 30 *
31 * Corrupted registers: x0-x7, x9-x11 31 * Corrupted registers: x0-x7, x9-x11
32 */ 32 */
33ENTRY(__flush_dcache_all) 33__flush_dcache_all:
34 dsb sy // ensure ordering with previous memory accesses 34 dsb sy // ensure ordering with previous memory accesses
35 mrs x0, clidr_el1 // read clidr 35 mrs x0, clidr_el1 // read clidr
36 and x3, x0, #0x7000000 // extract loc from clidr 36 and x3, x0, #0x7000000 // extract loc from clidr
@@ -166,3 +166,81 @@ ENTRY(__flush_dcache_area)
166 dsb sy 166 dsb sy
167 ret 167 ret
168ENDPROC(__flush_dcache_area) 168ENDPROC(__flush_dcache_area)
169
170/*
171 * __dma_inv_range(start, end)
172 * - start - virtual start address of region
173 * - end - virtual end address of region
174 */
175__dma_inv_range:
176 dcache_line_size x2, x3
177 sub x3, x2, #1
178 bic x0, x0, x3
179 bic x1, x1, x3
1801: dc ivac, x0 // invalidate D / U line
181 add x0, x0, x2
182 cmp x0, x1
183 b.lo 1b
184 dsb sy
185 ret
186ENDPROC(__dma_inv_range)
187
188/*
189 * __dma_clean_range(start, end)
190 * - start - virtual start address of region
191 * - end - virtual end address of region
192 */
193__dma_clean_range:
194 dcache_line_size x2, x3
195 sub x3, x2, #1
196 bic x0, x0, x3
1971: dc cvac, x0 // clean D / U line
198 add x0, x0, x2
199 cmp x0, x1
200 b.lo 1b
201 dsb sy
202 ret
203ENDPROC(__dma_clean_range)
204
205/*
206 * __dma_flush_range(start, end)
207 * - start - virtual start address of region
208 * - end - virtual end address of region
209 */
210ENTRY(__dma_flush_range)
211 dcache_line_size x2, x3
212 sub x3, x2, #1
213 bic x0, x0, x3
2141: dc civac, x0 // clean & invalidate D / U line
215 add x0, x0, x2
216 cmp x0, x1
217 b.lo 1b
218 dsb sy
219 ret
220ENDPROC(__dma_flush_range)
221
222/*
223 * __dma_map_area(start, size, dir)
224 * - start - kernel virtual start address
225 * - size - size of region
226 * - dir - DMA direction
227 */
228ENTRY(__dma_map_area)
229 add x1, x1, x0
230 cmp w2, #DMA_FROM_DEVICE
231 b.eq __dma_inv_range
232 b __dma_clean_range
233ENDPROC(__dma_map_area)
234
235/*
236 * __dma_unmap_area(start, size, dir)
237 * - start - kernel virtual start address
238 * - size - size of region
239 * - dir - DMA direction
240 */
241ENTRY(__dma_unmap_area)
242 add x1, x1, x0
243 cmp w2, #DMA_TO_DEVICE
244 b.ne __dma_inv_range
245 ret
246ENDPROC(__dma_unmap_area)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index fbd76785c5db..0ba347e59f06 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -30,18 +30,26 @@
30struct dma_map_ops *dma_ops; 30struct dma_map_ops *dma_ops;
31EXPORT_SYMBOL(dma_ops); 31EXPORT_SYMBOL(dma_ops);
32 32
33static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size, 33static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
34 dma_addr_t *dma_handle, gfp_t flags, 34 bool coherent)
35 struct dma_attrs *attrs) 35{
36 if (!coherent || dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs))
37 return pgprot_writecombine(prot);
38 return prot;
39}
40
41static void *__dma_alloc_coherent(struct device *dev, size_t size,
42 dma_addr_t *dma_handle, gfp_t flags,
43 struct dma_attrs *attrs)
36{ 44{
37 if (dev == NULL) { 45 if (dev == NULL) {
38 WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); 46 WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
39 return NULL; 47 return NULL;
40 } 48 }
41 49
42 if (IS_ENABLED(CONFIG_ZONE_DMA32) && 50 if (IS_ENABLED(CONFIG_ZONE_DMA) &&
43 dev->coherent_dma_mask <= DMA_BIT_MASK(32)) 51 dev->coherent_dma_mask <= DMA_BIT_MASK(32))
44 flags |= GFP_DMA32; 52 flags |= GFP_DMA;
45 if (IS_ENABLED(CONFIG_DMA_CMA)) { 53 if (IS_ENABLED(CONFIG_DMA_CMA)) {
46 struct page *page; 54 struct page *page;
47 55
@@ -58,9 +66,9 @@ static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size,
58 } 66 }
59} 67}
60 68
61static void arm64_swiotlb_free_coherent(struct device *dev, size_t size, 69static void __dma_free_coherent(struct device *dev, size_t size,
62 void *vaddr, dma_addr_t dma_handle, 70 void *vaddr, dma_addr_t dma_handle,
63 struct dma_attrs *attrs) 71 struct dma_attrs *attrs)
64{ 72{
65 if (dev == NULL) { 73 if (dev == NULL) {
66 WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); 74 WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
@@ -78,9 +86,212 @@ static void arm64_swiotlb_free_coherent(struct device *dev, size_t size,
78 } 86 }
79} 87}
80 88
81static struct dma_map_ops arm64_swiotlb_dma_ops = { 89static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
82 .alloc = arm64_swiotlb_alloc_coherent, 90 dma_addr_t *dma_handle, gfp_t flags,
83 .free = arm64_swiotlb_free_coherent, 91 struct dma_attrs *attrs)
92{
93 struct page *page, **map;
94 void *ptr, *coherent_ptr;
95 int order, i;
96
97 size = PAGE_ALIGN(size);
98 order = get_order(size);
99
100 ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs);
101 if (!ptr)
102 goto no_mem;
103 map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA);
104 if (!map)
105 goto no_map;
106
107 /* remove any dirty cache lines on the kernel alias */
108 __dma_flush_range(ptr, ptr + size);
109
110 /* create a coherent mapping */
111 page = virt_to_page(ptr);
112 for (i = 0; i < (size >> PAGE_SHIFT); i++)
113 map[i] = page + i;
114 coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP,
115 __get_dma_pgprot(attrs, pgprot_default, false));
116 kfree(map);
117 if (!coherent_ptr)
118 goto no_map;
119
120 return coherent_ptr;
121
122no_map:
123 __dma_free_coherent(dev, size, ptr, *dma_handle, attrs);
124no_mem:
125 *dma_handle = ~0;
126 return NULL;
127}
128
129static void __dma_free_noncoherent(struct device *dev, size_t size,
130 void *vaddr, dma_addr_t dma_handle,
131 struct dma_attrs *attrs)
132{
133 void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
134
135 vunmap(vaddr);
136 __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs);
137}
138
139static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
140 unsigned long offset, size_t size,
141 enum dma_data_direction dir,
142 struct dma_attrs *attrs)
143{
144 dma_addr_t dev_addr;
145
146 dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
147 __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
148
149 return dev_addr;
150}
151
152
153static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
154 size_t size, enum dma_data_direction dir,
155 struct dma_attrs *attrs)
156{
157 __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
158 swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
159}
160
161static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
162 int nelems, enum dma_data_direction dir,
163 struct dma_attrs *attrs)
164{
165 struct scatterlist *sg;
166 int i, ret;
167
168 ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
169 for_each_sg(sgl, sg, ret, i)
170 __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
171 sg->length, dir);
172
173 return ret;
174}
175
176static void __swiotlb_unmap_sg_attrs(struct device *dev,
177 struct scatterlist *sgl, int nelems,
178 enum dma_data_direction dir,
179 struct dma_attrs *attrs)
180{
181 struct scatterlist *sg;
182 int i;
183
184 for_each_sg(sgl, sg, nelems, i)
185 __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
186 sg->length, dir);
187 swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
188}
189
190static void __swiotlb_sync_single_for_cpu(struct device *dev,
191 dma_addr_t dev_addr, size_t size,
192 enum dma_data_direction dir)
193{
194 __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
195 swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir);
196}
197
198static void __swiotlb_sync_single_for_device(struct device *dev,
199 dma_addr_t dev_addr, size_t size,
200 enum dma_data_direction dir)
201{
202 swiotlb_sync_single_for_device(dev, dev_addr, size, dir);
203 __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
204}
205
206static void __swiotlb_sync_sg_for_cpu(struct device *dev,
207 struct scatterlist *sgl, int nelems,
208 enum dma_data_direction dir)
209{
210 struct scatterlist *sg;
211 int i;
212
213 for_each_sg(sgl, sg, nelems, i)
214 __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
215 sg->length, dir);
216 swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
217}
218
219static void __swiotlb_sync_sg_for_device(struct device *dev,
220 struct scatterlist *sgl, int nelems,
221 enum dma_data_direction dir)
222{
223 struct scatterlist *sg;
224 int i;
225
226 swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
227 for_each_sg(sgl, sg, nelems, i)
228 __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
229 sg->length, dir);
230}
231
232/* vma->vm_page_prot must be set appropriately before calling this function */
233static int __dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
234 void *cpu_addr, dma_addr_t dma_addr, size_t size)
235{
236 int ret = -ENXIO;
237 unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >>
238 PAGE_SHIFT;
239 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
240 unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT;
241 unsigned long off = vma->vm_pgoff;
242
243 if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
244 return ret;
245
246 if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
247 ret = remap_pfn_range(vma, vma->vm_start,
248 pfn + off,
249 vma->vm_end - vma->vm_start,
250 vma->vm_page_prot);
251 }
252
253 return ret;
254}
255
256static int __swiotlb_mmap_noncoherent(struct device *dev,
257 struct vm_area_struct *vma,
258 void *cpu_addr, dma_addr_t dma_addr, size_t size,
259 struct dma_attrs *attrs)
260{
261 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, false);
262 return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
263}
264
265static int __swiotlb_mmap_coherent(struct device *dev,
266 struct vm_area_struct *vma,
267 void *cpu_addr, dma_addr_t dma_addr, size_t size,
268 struct dma_attrs *attrs)
269{
270 /* Just use whatever page_prot attributes were specified */
271 return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
272}
273
274struct dma_map_ops noncoherent_swiotlb_dma_ops = {
275 .alloc = __dma_alloc_noncoherent,
276 .free = __dma_free_noncoherent,
277 .mmap = __swiotlb_mmap_noncoherent,
278 .map_page = __swiotlb_map_page,
279 .unmap_page = __swiotlb_unmap_page,
280 .map_sg = __swiotlb_map_sg_attrs,
281 .unmap_sg = __swiotlb_unmap_sg_attrs,
282 .sync_single_for_cpu = __swiotlb_sync_single_for_cpu,
283 .sync_single_for_device = __swiotlb_sync_single_for_device,
284 .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
285 .sync_sg_for_device = __swiotlb_sync_sg_for_device,
286 .dma_supported = swiotlb_dma_supported,
287 .mapping_error = swiotlb_dma_mapping_error,
288};
289EXPORT_SYMBOL(noncoherent_swiotlb_dma_ops);
290
291struct dma_map_ops coherent_swiotlb_dma_ops = {
292 .alloc = __dma_alloc_coherent,
293 .free = __dma_free_coherent,
294 .mmap = __swiotlb_mmap_coherent,
84 .map_page = swiotlb_map_page, 295 .map_page = swiotlb_map_page,
85 .unmap_page = swiotlb_unmap_page, 296 .unmap_page = swiotlb_unmap_page,
86 .map_sg = swiotlb_map_sg_attrs, 297 .map_sg = swiotlb_map_sg_attrs,
@@ -92,12 +303,19 @@ static struct dma_map_ops arm64_swiotlb_dma_ops = {
92 .dma_supported = swiotlb_dma_supported, 303 .dma_supported = swiotlb_dma_supported,
93 .mapping_error = swiotlb_dma_mapping_error, 304 .mapping_error = swiotlb_dma_mapping_error,
94}; 305};
306EXPORT_SYMBOL(coherent_swiotlb_dma_ops);
307
308extern int swiotlb_late_init_with_default_size(size_t default_size);
95 309
96void __init arm64_swiotlb_init(void) 310static int __init swiotlb_late_init(void)
97{ 311{
98 dma_ops = &arm64_swiotlb_dma_ops; 312 size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT);
99 swiotlb_init(1); 313
314 dma_ops = &coherent_swiotlb_dma_ops;
315
316 return swiotlb_late_init_with_default_size(swiotlb_size);
100} 317}
318subsys_initcall(swiotlb_late_init);
101 319
102#define PREALLOC_DMA_DEBUG_ENTRIES 4096 320#define PREALLOC_DMA_DEBUG_ENTRIES 4096
103 321
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index d0b4c2efda90..88627c450a6c 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -30,6 +30,7 @@
30#include <linux/memblock.h> 30#include <linux/memblock.h>
31#include <linux/sort.h> 31#include <linux/sort.h>
32#include <linux/of_fdt.h> 32#include <linux/of_fdt.h>
33#include <linux/dma-mapping.h>
33#include <linux/dma-contiguous.h> 34#include <linux/dma-contiguous.h>
34 35
35#include <asm/sections.h> 36#include <asm/sections.h>
@@ -59,22 +60,22 @@ static int __init early_initrd(char *p)
59early_param("initrd", early_initrd); 60early_param("initrd", early_initrd);
60#endif 61#endif
61 62
62#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
63
64static void __init zone_sizes_init(unsigned long min, unsigned long max) 63static void __init zone_sizes_init(unsigned long min, unsigned long max)
65{ 64{
66 struct memblock_region *reg; 65 struct memblock_region *reg;
67 unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; 66 unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
68 unsigned long max_dma32 = min; 67 unsigned long max_dma = min;
69 68
70 memset(zone_size, 0, sizeof(zone_size)); 69 memset(zone_size, 0, sizeof(zone_size));
71 70
72#ifdef CONFIG_ZONE_DMA32
73 /* 4GB maximum for 32-bit only capable devices */ 71 /* 4GB maximum for 32-bit only capable devices */
74 max_dma32 = max(min, min(max, MAX_DMA32_PFN)); 72 if (IS_ENABLED(CONFIG_ZONE_DMA)) {
75 zone_size[ZONE_DMA32] = max_dma32 - min; 73 unsigned long max_dma_phys =
76#endif 74 (unsigned long)dma_to_phys(NULL, DMA_BIT_MASK(32) + 1);
77 zone_size[ZONE_NORMAL] = max - max_dma32; 75 max_dma = max(min, min(max, max_dma_phys >> PAGE_SHIFT));
76 zone_size[ZONE_DMA] = max_dma - min;
77 }
78 zone_size[ZONE_NORMAL] = max - max_dma;
78 79
79 memcpy(zhole_size, zone_size, sizeof(zhole_size)); 80 memcpy(zhole_size, zone_size, sizeof(zhole_size));
80 81
@@ -84,15 +85,15 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
84 85
85 if (start >= max) 86 if (start >= max)
86 continue; 87 continue;
87#ifdef CONFIG_ZONE_DMA32 88
88 if (start < max_dma32) { 89 if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) {
89 unsigned long dma_end = min(end, max_dma32); 90 unsigned long dma_end = min(end, max_dma);
90 zhole_size[ZONE_DMA32] -= dma_end - start; 91 zhole_size[ZONE_DMA] -= dma_end - start;
91 } 92 }
92#endif 93
93 if (end > max_dma32) { 94 if (end > max_dma) {
94 unsigned long normal_end = min(end, max); 95 unsigned long normal_end = min(end, max);
95 unsigned long normal_start = max(start, max_dma32); 96 unsigned long normal_start = max(start, max_dma);
96 zhole_size[ZONE_NORMAL] -= normal_end - normal_start; 97 zhole_size[ZONE_NORMAL] -= normal_end - normal_start;
97 } 98 }
98 } 99 }
@@ -261,8 +262,6 @@ static void __init free_unused_memmap(void)
261 */ 262 */
262void __init mem_init(void) 263void __init mem_init(void)
263{ 264{
264 arm64_swiotlb_init();
265
266 max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; 265 max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
267 266
268#ifndef CONFIG_SPARSEMEM_VMEMMAP 267#ifndef CONFIG_SPARSEMEM_VMEMMAP
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 1333e6f9a8e5..e085ee6ef4e2 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -173,12 +173,6 @@ ENDPROC(cpu_do_switch_mm)
173 * value of the SCTLR_EL1 register. 173 * value of the SCTLR_EL1 register.
174 */ 174 */
175ENTRY(__cpu_setup) 175ENTRY(__cpu_setup)
176 /*
177 * Preserve the link register across the function call.
178 */
179 mov x28, lr
180 bl __flush_dcache_all
181 mov lr, x28
182 ic iallu // I+BTB cache invalidate 176 ic iallu // I+BTB cache invalidate
183 tlbi vmalle1is // invalidate I + D TLBs 177 tlbi vmalle1is // invalidate I + D TLBs
184 dsb sy 178 dsb sy
@@ -215,8 +209,14 @@ ENTRY(__cpu_setup)
215 * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for 209 * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for
216 * both user and kernel. 210 * both user and kernel.
217 */ 211 */
218 ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | TCR_IPS_40BIT | \ 212 ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | \
219 TCR_ASID16 | TCR_TBI0 | (1 << 31) 213 TCR_ASID16 | TCR_TBI0 | (1 << 31)
214 /*
215 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
216 * TCR_EL1.
217 */
218 mrs x9, ID_AA64MMFR0_EL1
219 bfi x10, x9, #32, #3
220#ifdef CONFIG_ARM64_64K_PAGES 220#ifdef CONFIG_ARM64_64K_PAGES
221 orr x10, x10, TCR_TG0_64K 221 orr x10, x10, TCR_TG0_64K
222 orr x10, x10, TCR_TG1_64K 222 orr x10, x10, TCR_TG1_64K