aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-11 15:22:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-11 15:22:12 -0400
commite16c4790de39dc861b749674c2a9319507f6f64f (patch)
treeff59a163d1640813e6f2fc461888aa4167c2c282
parentf7cca14bcec49989bcfe7cf2135ac719352d7022 (diff)
Revert "iommu/amd_iommu: Use CONFIG_DMA_DIRECT_OPS=y and dma_direct_{alloc,free}()"
This reverts commit b468620f2a1dfdcfddfd6fa54367b8bcc1b51248. It turns out that this broke drm on AMD platforms. Quoting Gabriel C: "I can confirm reverting b468620f2a1dfdcfddfd6fa54367b8bcc1b51248 fixes that issue for me. The GPU is working fine with SME enabled. Now with working GPU :) I can also confirm performance is back to normal without doing any other workarounds" Christan König analyzed it partially: "As far as I analyzed it we now get an -ENOMEM from dma_alloc_attrs() in drivers/gpu/drm/ttm/ttm_page_alloc_dma.c when IOMMU is enabled" and Christoph Hellwig responded: "I think the prime issue is that dma_direct_alloc respects the dma mask. Which we don't need if actually using the iommu. This would be mostly harmless exept for the the SEV bit high in the address that makes the checks fail. For now I'd say revert this commit for 4.17/4.18-rc and I'll look into addressing these issues properly" Reported-and-bisected-by: Gabriel C <nix.or.die@gmail.com> Acked-by: Christoph Hellwig <hch@lst.de> Cc: Christian König <christian.koenig@amd.com> Cc: Michel Dänzer <michel.daenzer@amd.com> Cc: Joerg Roedel <jroedel@suse.de> Cc: Tom Lendacky <thomas.lendacky@amd.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: stable@kernel.org # v4.17 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/iommu/Kconfig1
-rw-r--r--drivers/iommu/amd_iommu.c68
2 files changed, 47 insertions, 22 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 8ea77efb2e29..e055d228bfb9 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -107,7 +107,6 @@ config IOMMU_PGTABLES_L2
107# AMD IOMMU support 107# AMD IOMMU support
108config AMD_IOMMU 108config AMD_IOMMU
109 bool "AMD IOMMU support" 109 bool "AMD IOMMU support"
110 select DMA_DIRECT_OPS
111 select SWIOTLB 110 select SWIOTLB
112 select PCI_MSI 111 select PCI_MSI
113 select PCI_ATS 112 select PCI_ATS
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 0cea80be2888..596b95c50051 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2596,32 +2596,51 @@ static void *alloc_coherent(struct device *dev, size_t size,
2596 unsigned long attrs) 2596 unsigned long attrs)
2597{ 2597{
2598 u64 dma_mask = dev->coherent_dma_mask; 2598 u64 dma_mask = dev->coherent_dma_mask;
2599 struct protection_domain *domain = get_domain(dev); 2599 struct protection_domain *domain;
2600 bool is_direct = false; 2600 struct dma_ops_domain *dma_dom;
2601 void *virt_addr; 2601 struct page *page;
2602
2603 domain = get_domain(dev);
2604 if (PTR_ERR(domain) == -EINVAL) {
2605 page = alloc_pages(flag, get_order(size));
2606 *dma_addr = page_to_phys(page);
2607 return page_address(page);
2608 } else if (IS_ERR(domain))
2609 return NULL;
2602 2610
2603 if (IS_ERR(domain)) { 2611 dma_dom = to_dma_ops_domain(domain);
2604 if (PTR_ERR(domain) != -EINVAL) 2612 size = PAGE_ALIGN(size);
2613 dma_mask = dev->coherent_dma_mask;
2614 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
2615 flag |= __GFP_ZERO;
2616
2617 page = alloc_pages(flag | __GFP_NOWARN, get_order(size));
2618 if (!page) {
2619 if (!gfpflags_allow_blocking(flag))
2605 return NULL; 2620 return NULL;
2606 is_direct = true;
2607 }
2608 2621
2609 virt_addr = dma_direct_alloc(dev, size, dma_addr, flag, attrs); 2622 page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
2610 if (!virt_addr || is_direct) 2623 get_order(size), flag);
2611 return virt_addr; 2624 if (!page)
2625 return NULL;
2626 }
2612 2627
2613 if (!dma_mask) 2628 if (!dma_mask)
2614 dma_mask = *dev->dma_mask; 2629 dma_mask = *dev->dma_mask;
2615 2630
2616 *dma_addr = __map_single(dev, to_dma_ops_domain(domain), 2631 *dma_addr = __map_single(dev, dma_dom, page_to_phys(page),
2617 virt_to_phys(virt_addr), PAGE_ALIGN(size), 2632 size, DMA_BIDIRECTIONAL, dma_mask);
2618 DMA_BIDIRECTIONAL, dma_mask); 2633
2619 if (*dma_addr == AMD_IOMMU_MAPPING_ERROR) 2634 if (*dma_addr == AMD_IOMMU_MAPPING_ERROR)
2620 goto out_free; 2635 goto out_free;
2621 return virt_addr; 2636
2637 return page_address(page);
2622 2638
2623out_free: 2639out_free:
2624 dma_direct_free(dev, size, virt_addr, *dma_addr, attrs); 2640
2641 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
2642 __free_pages(page, get_order(size));
2643
2625 return NULL; 2644 return NULL;
2626} 2645}
2627 2646
@@ -2632,17 +2651,24 @@ static void free_coherent(struct device *dev, size_t size,
2632 void *virt_addr, dma_addr_t dma_addr, 2651 void *virt_addr, dma_addr_t dma_addr,
2633 unsigned long attrs) 2652 unsigned long attrs)
2634{ 2653{
2635 struct protection_domain *domain = get_domain(dev); 2654 struct protection_domain *domain;
2655 struct dma_ops_domain *dma_dom;
2656 struct page *page;
2636 2657
2658 page = virt_to_page(virt_addr);
2637 size = PAGE_ALIGN(size); 2659 size = PAGE_ALIGN(size);
2638 2660
2639 if (!IS_ERR(domain)) { 2661 domain = get_domain(dev);
2640 struct dma_ops_domain *dma_dom = to_dma_ops_domain(domain); 2662 if (IS_ERR(domain))
2663 goto free_mem;
2641 2664
2642 __unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL); 2665 dma_dom = to_dma_ops_domain(domain);
2643 } 2666
2667 __unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL);
2644 2668
2645 dma_direct_free(dev, size, virt_addr, dma_addr, attrs); 2669free_mem:
2670 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
2671 __free_pages(page, get_order(size));
2646} 2672}
2647 2673
2648/* 2674/*