aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGregory CLEMENT <gregory.clement@free-electrons.com>2016-04-15 06:15:18 -0400
committerRussell King <rmk+kernel@armlinux.org.uk>2016-07-14 11:25:30 -0400
commitf12708965069410691e47d1d216ec7ad1516bfd2 (patch)
tree84107954ba1e3dc2100dc8b9545fcab984f22523
parent9f6f93543d473d656bdc5c94f567c7684e956e52 (diff)
ARM: 8561/3: dma-mapping: Don't use outer_flush_range when the L2C is coherent
When a L2 cache controller is used in a system that provides hardware coherency, the entire outer cache operations are useless, and can be skipped. Moreover, on some systems, it is harmful as it causes deadlocks between the Marvell coherency mechanism, the Marvell PCIe controller and the Cortex-A9. In the current kernel implementation, the outer cache flush range operation is triggered by the dma_alloc function. This operation can be take place during runtime and in some circumstances may lead to the PCIe/PL310 deadlock on Armada 375/38x SoCs. This patch extends the __dma_clear_buffer() function to receive a boolean argument related to the coherency of the system. The same things is done for the calling functions. Reported-by: Nadav Haklai <nadavh@marvell.com> Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com> Cc: <stable@vger.kernel.org> # v3.16+ Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
-rw-r--r--arch/arm/mm/dma-mapping.c62
1 files changed, 42 insertions, 20 deletions
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ff7ed5697d3e..d2485c749ad5 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -49,6 +49,7 @@ struct arm_dma_alloc_args {
49 pgprot_t prot; 49 pgprot_t prot;
50 const void *caller; 50 const void *caller;
51 bool want_vaddr; 51 bool want_vaddr;
52 int coherent_flag;
52}; 53};
53 54
54struct arm_dma_free_args { 55struct arm_dma_free_args {
@@ -59,6 +60,9 @@ struct arm_dma_free_args {
59 bool want_vaddr; 60 bool want_vaddr;
60}; 61};
61 62
63#define NORMAL 0
64#define COHERENT 1
65
62struct arm_dma_allocator { 66struct arm_dma_allocator {
63 void *(*alloc)(struct arm_dma_alloc_args *args, 67 void *(*alloc)(struct arm_dma_alloc_args *args,
64 struct page **ret_page); 68 struct page **ret_page);
@@ -272,7 +276,7 @@ static u64 get_coherent_dma_mask(struct device *dev)
272 return mask; 276 return mask;
273} 277}
274 278
275static void __dma_clear_buffer(struct page *page, size_t size) 279static void __dma_clear_buffer(struct page *page, size_t size, int coherent_flag)
276{ 280{
277 /* 281 /*
278 * Ensure that the allocated pages are zeroed, and that any data 282 * Ensure that the allocated pages are zeroed, and that any data
@@ -284,17 +288,21 @@ static void __dma_clear_buffer(struct page *page, size_t size)
284 while (size > 0) { 288 while (size > 0) {
285 void *ptr = kmap_atomic(page); 289 void *ptr = kmap_atomic(page);
286 memset(ptr, 0, PAGE_SIZE); 290 memset(ptr, 0, PAGE_SIZE);
287 dmac_flush_range(ptr, ptr + PAGE_SIZE); 291 if (coherent_flag != COHERENT)
292 dmac_flush_range(ptr, ptr + PAGE_SIZE);
288 kunmap_atomic(ptr); 293 kunmap_atomic(ptr);
289 page++; 294 page++;
290 size -= PAGE_SIZE; 295 size -= PAGE_SIZE;
291 } 296 }
292 outer_flush_range(base, end); 297 if (coherent_flag != COHERENT)
298 outer_flush_range(base, end);
293 } else { 299 } else {
294 void *ptr = page_address(page); 300 void *ptr = page_address(page);
295 memset(ptr, 0, size); 301 memset(ptr, 0, size);
296 dmac_flush_range(ptr, ptr + size); 302 if (coherent_flag != COHERENT) {
297 outer_flush_range(__pa(ptr), __pa(ptr) + size); 303 dmac_flush_range(ptr, ptr + size);
304 outer_flush_range(__pa(ptr), __pa(ptr) + size);
305 }
298 } 306 }
299} 307}
300 308
@@ -302,7 +310,8 @@ static void __dma_clear_buffer(struct page *page, size_t size)
302 * Allocate a DMA buffer for 'dev' of size 'size' using the 310 * Allocate a DMA buffer for 'dev' of size 'size' using the
303 * specified gfp mask. Note that 'size' must be page aligned. 311 * specified gfp mask. Note that 'size' must be page aligned.
304 */ 312 */
305static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) 313static struct page *__dma_alloc_buffer(struct device *dev, size_t size,
314 gfp_t gfp, int coherent_flag)
306{ 315{
307 unsigned long order = get_order(size); 316 unsigned long order = get_order(size);
308 struct page *page, *p, *e; 317 struct page *page, *p, *e;
@@ -318,7 +327,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
318 for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) 327 for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
319 __free_page(p); 328 __free_page(p);
320 329
321 __dma_clear_buffer(page, size); 330 __dma_clear_buffer(page, size, coherent_flag);
322 331
323 return page; 332 return page;
324} 333}
@@ -340,7 +349,8 @@ static void __dma_free_buffer(struct page *page, size_t size)
340 349
341static void *__alloc_from_contiguous(struct device *dev, size_t size, 350static void *__alloc_from_contiguous(struct device *dev, size_t size,
342 pgprot_t prot, struct page **ret_page, 351 pgprot_t prot, struct page **ret_page,
343 const void *caller, bool want_vaddr); 352 const void *caller, bool want_vaddr,
353 int coherent_flag);
344 354
345static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, 355static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
346 pgprot_t prot, struct page **ret_page, 356 pgprot_t prot, struct page **ret_page,
@@ -405,10 +415,13 @@ static int __init atomic_pool_init(void)
405 atomic_pool = gen_pool_create(PAGE_SHIFT, -1); 415 atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
406 if (!atomic_pool) 416 if (!atomic_pool)
407 goto out; 417 goto out;
408 418 /*
419 * The atomic pool is only used for non-coherent allocations
420 * so we must pass NORMAL for coherent_flag.
421 */
409 if (dev_get_cma_area(NULL)) 422 if (dev_get_cma_area(NULL))
410 ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot, 423 ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot,
411 &page, atomic_pool_init, true); 424 &page, atomic_pool_init, true, NORMAL);
412 else 425 else
413 ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot, 426 ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot,
414 &page, atomic_pool_init, true); 427 &page, atomic_pool_init, true);
@@ -522,7 +535,11 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
522{ 535{
523 struct page *page; 536 struct page *page;
524 void *ptr = NULL; 537 void *ptr = NULL;
525 page = __dma_alloc_buffer(dev, size, gfp); 538 /*
539 * __alloc_remap_buffer is only called when the device is
540 * non-coherent
541 */
542 page = __dma_alloc_buffer(dev, size, gfp, NORMAL);
526 if (!page) 543 if (!page)
527 return NULL; 544 return NULL;
528 if (!want_vaddr) 545 if (!want_vaddr)
@@ -577,7 +594,8 @@ static int __free_from_pool(void *start, size_t size)
577 594
578static void *__alloc_from_contiguous(struct device *dev, size_t size, 595static void *__alloc_from_contiguous(struct device *dev, size_t size,
579 pgprot_t prot, struct page **ret_page, 596 pgprot_t prot, struct page **ret_page,
580 const void *caller, bool want_vaddr) 597 const void *caller, bool want_vaddr,
598 int coherent_flag)
581{ 599{
582 unsigned long order = get_order(size); 600 unsigned long order = get_order(size);
583 size_t count = size >> PAGE_SHIFT; 601 size_t count = size >> PAGE_SHIFT;
@@ -588,7 +606,7 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
588 if (!page) 606 if (!page)
589 return NULL; 607 return NULL;
590 608
591 __dma_clear_buffer(page, size); 609 __dma_clear_buffer(page, size, coherent_flag);
592 610
593 if (!want_vaddr) 611 if (!want_vaddr)
594 goto out; 612 goto out;
@@ -638,7 +656,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
638#define __get_dma_pgprot(attrs, prot) __pgprot(0) 656#define __get_dma_pgprot(attrs, prot) __pgprot(0)
639#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL 657#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL
640#define __alloc_from_pool(size, ret_page) NULL 658#define __alloc_from_pool(size, ret_page) NULL
641#define __alloc_from_contiguous(dev, size, prot, ret, c, wv) NULL 659#define __alloc_from_contiguous(dev, size, prot, ret, c, wv, coherent_flag) NULL
642#define __free_from_pool(cpu_addr, size) do { } while (0) 660#define __free_from_pool(cpu_addr, size) do { } while (0)
643#define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0) 661#define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0)
644#define __dma_free_remap(cpu_addr, size) do { } while (0) 662#define __dma_free_remap(cpu_addr, size) do { } while (0)
@@ -649,7 +667,8 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
649 struct page **ret_page) 667 struct page **ret_page)
650{ 668{
651 struct page *page; 669 struct page *page;
652 page = __dma_alloc_buffer(dev, size, gfp); 670 /* __alloc_simple_buffer is only called when the device is coherent */
671 page = __dma_alloc_buffer(dev, size, gfp, COHERENT);
653 if (!page) 672 if (!page)
654 return NULL; 673 return NULL;
655 674
@@ -679,7 +698,7 @@ static void *cma_allocator_alloc(struct arm_dma_alloc_args *args,
679{ 698{
680 return __alloc_from_contiguous(args->dev, args->size, args->prot, 699 return __alloc_from_contiguous(args->dev, args->size, args->prot,
681 ret_page, args->caller, 700 ret_page, args->caller,
682 args->want_vaddr); 701 args->want_vaddr, args->coherent_flag);
683} 702}
684 703
685static void cma_allocator_free(struct arm_dma_free_args *args) 704static void cma_allocator_free(struct arm_dma_free_args *args)
@@ -746,6 +765,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
746 .prot = prot, 765 .prot = prot,
747 .caller = caller, 766 .caller = caller,
748 .want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs), 767 .want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs),
768 .coherent_flag = is_coherent ? COHERENT : NORMAL,
749 }; 769 };
750 770
751#ifdef CONFIG_DMA_API_DEBUG 771#ifdef CONFIG_DMA_API_DEBUG
@@ -1253,7 +1273,8 @@ static inline void __free_iova(struct dma_iommu_mapping *mapping,
1253static const int iommu_order_array[] = { 9, 8, 4, 0 }; 1273static const int iommu_order_array[] = { 9, 8, 4, 0 };
1254 1274
1255static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, 1275static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
1256 gfp_t gfp, struct dma_attrs *attrs) 1276 gfp_t gfp, struct dma_attrs *attrs,
1277 int coherent_flag)
1257{ 1278{
1258 struct page **pages; 1279 struct page **pages;
1259 int count = size >> PAGE_SHIFT; 1280 int count = size >> PAGE_SHIFT;
@@ -1277,7 +1298,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
1277 if (!page) 1298 if (!page)
1278 goto error; 1299 goto error;
1279 1300
1280 __dma_clear_buffer(page, size); 1301 __dma_clear_buffer(page, size, coherent_flag);
1281 1302
1282 for (i = 0; i < count; i++) 1303 for (i = 0; i < count; i++)
1283 pages[i] = page + i; 1304 pages[i] = page + i;
@@ -1327,7 +1348,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
1327 pages[i + j] = pages[i] + j; 1348 pages[i + j] = pages[i] + j;
1328 } 1349 }
1329 1350
1330 __dma_clear_buffer(pages[i], PAGE_SIZE << order); 1351 __dma_clear_buffer(pages[i], PAGE_SIZE << order, coherent_flag);
1331 i += 1 << order; 1352 i += 1 << order;
1332 count -= 1 << order; 1353 count -= 1 << order;
1333 } 1354 }
@@ -1505,7 +1526,8 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
1505 */ 1526 */
1506 gfp &= ~(__GFP_COMP); 1527 gfp &= ~(__GFP_COMP);
1507 1528
1508 pages = __iommu_alloc_buffer(dev, size, gfp, attrs); 1529 /* For now always consider we are in a non-coherent case */
1530 pages = __iommu_alloc_buffer(dev, size, gfp, attrs, NORMAL);
1509 if (!pages) 1531 if (!pages)
1510 return NULL; 1532 return NULL;
1511 1533