summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-09-19 16:27:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-09-19 16:27:23 -0400
commit671df189537883f36cf9c7d4f9495bfac0f86627 (patch)
tree22e5f598ed1f5d9b2218d85d4426140f804d61e6
parentc9fe5630dae1df2328d82042602e2c4d1add8d57 (diff)
parentc7d9eccb3c1e802c5cbb2a764eb0eb9807d9f12e (diff)
Merge tag 'dma-mapping-5.4' of git://git.infradead.org/users/hch/dma-mapping
Pull dma-mapping updates from Christoph Hellwig: - add dma-mapping and block layer helpers to take care of IOMMU merging for mmc plus subsequent fixups (Yoshihiro Shimoda) - rework handling of the pgprot bits for remapping (me) - take care of the dma direct infrastructure for swiotlb-xen (me) - improve the dma noncoherent remapping infrastructure (me) - better defaults for ->mmap, ->get_sgtable and ->get_required_mask (me) - cleanup mmaping of coherent DMA allocations (me) - various misc cleanups (Andy Shevchenko, me) * tag 'dma-mapping-5.4' of git://git.infradead.org/users/hch/dma-mapping: (41 commits) mmc: renesas_sdhi_internal_dmac: Add MMC_CAP2_MERGE_CAPABLE mmc: queue: Fix bigger segments usage arm64: use asm-generic/dma-mapping.h swiotlb-xen: merge xen_unmap_single into xen_swiotlb_unmap_page swiotlb-xen: simplify cache maintainance swiotlb-xen: use the same foreign page check everywhere swiotlb-xen: remove xen_swiotlb_dma_mmap and xen_swiotlb_dma_get_sgtable xen: remove the exports for xen_{create,destroy}_contiguous_region xen/arm: remove xen_dma_ops xen/arm: simplify dma_cache_maint xen/arm: use dev_is_dma_coherent xen/arm: consolidate page-coherent.h xen/arm: use dma-noncoherent.h calls for xen-swiotlb cache maintainance arm: remove wrappers for the generic dma remap helpers dma-mapping: introduce a dma_common_find_pages helper dma-mapping: always use VM_DMA_COHERENT for generic DMA remap vmalloc: lift the arm flag for coherent mappings to common code dma-mapping: provide a better default ->get_required_mask dma-mapping: remove the dma_declare_coherent_memory export remoteproc: don't allow modular build ...
-rw-r--r--Documentation/DMA-API.txt19
-rw-r--r--Documentation/x86/x86_64/boot-options.rst2
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/alpha/kernel/pci_iommu.c2
-rw-r--r--arch/arc/mm/dma.c6
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/include/asm/device.h3
-rw-r--r--arch/arm/include/asm/dma-mapping.h6
-rw-r--r--arch/arm/include/asm/pgtable-nommu.h1
-rw-r--r--arch/arm/include/asm/xen/page-coherent.h93
-rw-r--r--arch/arm/mm/dma-mapping-nommu.c5
-rw-r--r--arch/arm/mm/dma-mapping.c84
-rw-r--r--arch/arm/mm/mm.h3
-rw-r--r--arch/arm/xen/mm.c129
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/arm64/include/asm/dma-mapping.h28
-rw-r--r--arch/arm64/include/asm/pgtable.h12
-rw-r--r--arch/arm64/include/asm/xen/page-coherent.h75
-rw-r--r--arch/arm64/mm/dma-mapping.c16
-rw-r--r--arch/c6x/Kconfig1
-rw-r--r--arch/csky/mm/dma-mapping.c6
-rw-r--r--arch/ia64/hp/common/sba_iommu.c2
-rw-r--r--arch/ia64/kernel/setup.c2
-rw-r--r--arch/m68k/Kconfig2
-rw-r--r--arch/m68k/include/asm/pgtable_mm.h3
-rw-r--r--arch/m68k/kernel/dma.c3
-rw-r--r--arch/microblaze/Kconfig1
-rw-r--r--arch/mips/Kconfig9
-rw-r--r--arch/mips/jazz/jazzdma.c2
-rw-r--r--arch/mips/mm/dma-noncoherent.c8
-rw-r--r--arch/nds32/kernel/dma.c6
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/powerpc/kernel/dma-iommu.c2
-rw-r--r--arch/powerpc/platforms/ps3/system-bus.c11
-rw-r--r--arch/powerpc/platforms/pseries/vio.c2
-rw-r--r--arch/s390/pci/pci_dma.c2
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/unicore32/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/xen/page-coherent.h14
-rw-r--r--arch/x86/kernel/amd_gart_64.c3
-rw-r--r--arch/x86/kernel/pci-calgary_64.c2
-rw-r--r--arch/x86/kernel/pci-swiotlb.c1
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/pci/sta2x11-fixup.c4
-rw-r--r--arch/x86/xen/mmu_pv.c2
-rw-r--r--arch/xtensa/Kconfig1
-rw-r--r--arch/xtensa/kernel/pci-dma.c4
-rw-r--r--block/blk-settings.c23
-rw-r--r--drivers/gpu/drm/omapdrm/dss/dispc.c11
-rw-r--r--drivers/iommu/amd_iommu.c2
-rw-r--r--drivers/iommu/dma-iommu.c29
-rw-r--r--drivers/iommu/intel-iommu.c2
-rw-r--r--drivers/mmc/core/queue.c41
-rw-r--r--drivers/mmc/host/renesas_sdhi_internal_dmac.c2
-rw-r--r--drivers/parisc/ccio-dma.c1
-rw-r--r--drivers/parisc/sba_iommu.c1
-rw-r--r--drivers/remoteproc/Kconfig2
-rw-r--r--drivers/xen/swiotlb-xen.c84
-rw-r--r--include/linux/blkdev.h2
-rw-r--r--include/linux/dma-mapping.h34
-rw-r--r--include/linux/dma-noncoherent.h13
-rw-r--r--include/linux/mmc/host.h2
-rw-r--r--include/linux/vmalloc.h2
-rw-r--r--include/xen/arm/hypervisor.h2
-rw-r--r--include/xen/arm/page-coherent.h24
-rw-r--r--include/xen/swiotlb-xen.h5
-rw-r--r--kernel/dma/Kconfig12
-rw-r--r--kernel/dma/coherent.c13
-rw-r--r--kernel/dma/mapping.c105
-rw-r--r--kernel/dma/remap.c51
-rw-r--r--mm/vmalloc.c5
-rw-r--r--sound/core/pcm_native.c13
73 files changed, 397 insertions, 674 deletions
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index e47c63bd4887..2d8d2fed7317 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -204,6 +204,14 @@ Returns the maximum size of a mapping for the device. The size parameter
204of the mapping functions like dma_map_single(), dma_map_page() and 204of the mapping functions like dma_map_single(), dma_map_page() and
205others should not be larger than the returned value. 205others should not be larger than the returned value.
206 206
207::
208
209 unsigned long
210 dma_get_merge_boundary(struct device *dev);
211
212Returns the DMA merge boundary. If the device cannot merge any the DMA address
213segments, the function returns 0.
214
207Part Id - Streaming DMA mappings 215Part Id - Streaming DMA mappings
208-------------------------------- 216--------------------------------
209 217
@@ -595,17 +603,6 @@ For reasons of efficiency, most platforms choose to track the declared
595region only at the granularity of a page. For smaller allocations, 603region only at the granularity of a page. For smaller allocations,
596you should use the dma_pool() API. 604you should use the dma_pool() API.
597 605
598::
599
600 void
601 dma_release_declared_memory(struct device *dev)
602
603Remove the memory region previously declared from the system. This
604API performs *no* in-use checking for this region and will return
605unconditionally having removed all the required structures. It is the
606driver's job to ensure that no parts of this memory region are
607currently in use.
608
609Part III - Debug drivers use of the DMA-API 606Part III - Debug drivers use of the DMA-API
610------------------------------------------- 607-------------------------------------------
611 608
diff --git a/Documentation/x86/x86_64/boot-options.rst b/Documentation/x86/x86_64/boot-options.rst
index 6a4285a3c7a4..2b98efb5ba7f 100644
--- a/Documentation/x86/x86_64/boot-options.rst
+++ b/Documentation/x86/x86_64/boot-options.rst
@@ -230,7 +230,7 @@ IOMMU (input/output memory management unit)
230=========================================== 230===========================================
231Multiple x86-64 PCI-DMA mapping implementations exist, for example: 231Multiple x86-64 PCI-DMA mapping implementations exist, for example:
232 232
233 1. <lib/dma-direct.c>: use no hardware/software IOMMU at all 233 1. <kernel/dma/direct.c>: use no hardware/software IOMMU at all
234 (e.g. because you have < 3 GB memory). 234 (e.g. because you have < 3 GB memory).
235 Kernel boot message: "PCI-DMA: Disabling IOMMU" 235 Kernel boot message: "PCI-DMA: Disabling IOMMU"
236 236
diff --git a/arch/Kconfig b/arch/Kconfig
index 6baedab10dca..c4b2afa138ca 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -793,9 +793,6 @@ config COMPAT_32BIT_TIME
793 This is relevant on all 32-bit architectures, and 64-bit architectures 793 This is relevant on all 32-bit architectures, and 64-bit architectures
794 as part of compat syscall handling. 794 as part of compat syscall handling.
795 795
796config ARCH_NO_COHERENT_DMA_MMAP
797 bool
798
799config ARCH_NO_PREEMPT 796config ARCH_NO_PREEMPT
800 bool 797 bool
801 798
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 242108439f42..7f1925a32c99 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -955,5 +955,7 @@ const struct dma_map_ops alpha_pci_ops = {
955 .map_sg = alpha_pci_map_sg, 955 .map_sg = alpha_pci_map_sg,
956 .unmap_sg = alpha_pci_unmap_sg, 956 .unmap_sg = alpha_pci_unmap_sg,
957 .dma_supported = alpha_pci_supported, 957 .dma_supported = alpha_pci_supported,
958 .mmap = dma_common_mmap,
959 .get_sgtable = dma_common_get_sgtable,
958}; 960};
959EXPORT_SYMBOL(alpha_pci_ops); 961EXPORT_SYMBOL(alpha_pci_ops);
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 70a3fbe79fba..73a7e88a1e92 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -104,9 +104,3 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
104 dev_info(dev, "use %scoherent DMA ops\n", 104 dev_info(dev, "use %scoherent DMA ops\n",
105 dev->dma_coherent ? "" : "non"); 105 dev->dma_coherent ? "" : "non");
106} 106}
107
108static int __init atomic_pool_init(void)
109{
110 return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
111}
112postcore_initcall(atomic_pool_init);
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2ae7f8adcac4..aa1d3b25e89f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -8,7 +8,7 @@ config ARM
8 select ARCH_HAS_DEBUG_VIRTUAL if MMU 8 select ARCH_HAS_DEBUG_VIRTUAL if MMU
9 select ARCH_HAS_DEVMEM_IS_ALLOWED 9 select ARCH_HAS_DEVMEM_IS_ALLOWED
10 select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB 10 select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB
11 select ARCH_HAS_DMA_MMAP_PGPROT if SWIOTLB 11 select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
12 select ARCH_HAS_ELF_RANDOMIZE 12 select ARCH_HAS_ELF_RANDOMIZE
13 select ARCH_HAS_FORTIFY_SOURCE 13 select ARCH_HAS_FORTIFY_SOURCE
14 select ARCH_HAS_KEEPINITRD 14 select ARCH_HAS_KEEPINITRD
diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
index f6955b55c544..c675bc0d5aa8 100644
--- a/arch/arm/include/asm/device.h
+++ b/arch/arm/include/asm/device.h
@@ -15,9 +15,6 @@ struct dev_archdata {
15#ifdef CONFIG_ARM_DMA_USE_IOMMU 15#ifdef CONFIG_ARM_DMA_USE_IOMMU
16 struct dma_iommu_mapping *mapping; 16 struct dma_iommu_mapping *mapping;
17#endif 17#endif
18#ifdef CONFIG_XEN
19 const struct dma_map_ops *dev_dma_ops;
20#endif
21 unsigned int dma_coherent:1; 18 unsigned int dma_coherent:1;
22 unsigned int dma_ops_setup:1; 19 unsigned int dma_ops_setup:1;
23}; 20};
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index dba9355e2484..bdd80ddbca34 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -91,12 +91,6 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr)
91} 91}
92#endif 92#endif
93 93
94/* do not use this function in a driver */
95static inline bool is_device_dma_coherent(struct device *dev)
96{
97 return dev->archdata.dma_coherent;
98}
99
100/** 94/**
101 * arm_dma_alloc - allocate consistent memory for DMA 95 * arm_dma_alloc - allocate consistent memory for DMA
102 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 96 * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h
index 0b1f6799a32e..d0de24f06724 100644
--- a/arch/arm/include/asm/pgtable-nommu.h
+++ b/arch/arm/include/asm/pgtable-nommu.h
@@ -62,7 +62,6 @@ typedef pte_t *pte_addr_t;
62 */ 62 */
63#define pgprot_noncached(prot) (prot) 63#define pgprot_noncached(prot) (prot)
64#define pgprot_writecombine(prot) (prot) 64#define pgprot_writecombine(prot) (prot)
65#define pgprot_dmacoherent(prot) (prot)
66#define pgprot_device(prot) (prot) 65#define pgprot_device(prot) (prot)
67 66
68 67
diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h
index 2c403e7c782d..27e984977402 100644
--- a/arch/arm/include/asm/xen/page-coherent.h
+++ b/arch/arm/include/asm/xen/page-coherent.h
@@ -1,95 +1,2 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H
3#define _ASM_ARM_XEN_PAGE_COHERENT_H
4
5#include <linux/dma-mapping.h>
6#include <asm/page.h>
7#include <xen/arm/page-coherent.h> 2#include <xen/arm/page-coherent.h>
8
9static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev)
10{
11 if (dev && dev->archdata.dev_dma_ops)
12 return dev->archdata.dev_dma_ops;
13 return get_arch_dma_ops(NULL);
14}
15
16static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
17 dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
18{
19 return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs);
20}
21
22static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
23 void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
24{
25 xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs);
26}
27
28static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
29 dma_addr_t dev_addr, unsigned long offset, size_t size,
30 enum dma_data_direction dir, unsigned long attrs)
31{
32 unsigned long page_pfn = page_to_xen_pfn(page);
33 unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
34 unsigned long compound_pages =
35 (1<<compound_order(page)) * XEN_PFN_PER_PAGE;
36 bool local = (page_pfn <= dev_pfn) &&
37 (dev_pfn - page_pfn < compound_pages);
38
39 /*
40 * Dom0 is mapped 1:1, while the Linux page can span across
41 * multiple Xen pages, it's not possible for it to contain a
42 * mix of local and foreign Xen pages. So if the first xen_pfn
43 * == mfn the page is local otherwise it's a foreign page
44 * grant-mapped in dom0. If the page is local we can safely
45 * call the native dma_ops function, otherwise we call the xen
46 * specific function.
47 */
48 if (local)
49 xen_get_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
50 else
51 __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
52}
53
54static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
55 size_t size, enum dma_data_direction dir, unsigned long attrs)
56{
57 unsigned long pfn = PFN_DOWN(handle);
58 /*
59 * Dom0 is mapped 1:1, while the Linux page can be spanned accross
60 * multiple Xen page, it's not possible to have a mix of local and
61 * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
62 * foreign mfn will always return false. If the page is local we can
63 * safely call the native dma_ops function, otherwise we call the xen
64 * specific function.
65 */
66 if (pfn_valid(pfn)) {
67 if (xen_get_dma_ops(hwdev)->unmap_page)
68 xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs);
69 } else
70 __xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
71}
72
73static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
74 dma_addr_t handle, size_t size, enum dma_data_direction dir)
75{
76 unsigned long pfn = PFN_DOWN(handle);
77 if (pfn_valid(pfn)) {
78 if (xen_get_dma_ops(hwdev)->sync_single_for_cpu)
79 xen_get_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir);
80 } else
81 __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
82}
83
84static inline void xen_dma_sync_single_for_device(struct device *hwdev,
85 dma_addr_t handle, size_t size, enum dma_data_direction dir)
86{
87 unsigned long pfn = PFN_DOWN(handle);
88 if (pfn_valid(pfn)) {
89 if (xen_get_dma_ops(hwdev)->sync_single_for_device)
90 xen_get_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir);
91 } else
92 __xen_dma_sync_single_for_device(hwdev, handle, size, dir);
93}
94
95#endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */
diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
index 52b82559d99b..db9247898300 100644
--- a/arch/arm/mm/dma-mapping-nommu.c
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -68,8 +68,9 @@ static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
68 68
69 if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret)) 69 if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret))
70 return ret; 70 return ret;
71 71 if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
72 return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); 72 return ret;
73 return -ENXIO;
73} 74}
74 75
75 76
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index d42557ee69c2..7d042d5c43e3 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -14,6 +14,7 @@
14#include <linux/list.h> 14#include <linux/list.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/device.h> 16#include <linux/device.h>
17#include <linux/dma-direct.h>
17#include <linux/dma-mapping.h> 18#include <linux/dma-mapping.h>
18#include <linux/dma-noncoherent.h> 19#include <linux/dma-noncoherent.h>
19#include <linux/dma-contiguous.h> 20#include <linux/dma-contiguous.h>
@@ -35,6 +36,7 @@
35#include <asm/mach/map.h> 36#include <asm/mach/map.h>
36#include <asm/system_info.h> 37#include <asm/system_info.h>
37#include <asm/dma-contiguous.h> 38#include <asm/dma-contiguous.h>
39#include <xen/swiotlb-xen.h>
38 40
39#include "dma.h" 41#include "dma.h"
40#include "mm.h" 42#include "mm.h"
@@ -192,6 +194,7 @@ const struct dma_map_ops arm_dma_ops = {
192 .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, 194 .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu,
193 .sync_sg_for_device = arm_dma_sync_sg_for_device, 195 .sync_sg_for_device = arm_dma_sync_sg_for_device,
194 .dma_supported = arm_dma_supported, 196 .dma_supported = arm_dma_supported,
197 .get_required_mask = dma_direct_get_required_mask,
195}; 198};
196EXPORT_SYMBOL(arm_dma_ops); 199EXPORT_SYMBOL(arm_dma_ops);
197 200
@@ -212,6 +215,7 @@ const struct dma_map_ops arm_coherent_dma_ops = {
212 .map_sg = arm_dma_map_sg, 215 .map_sg = arm_dma_map_sg,
213 .map_resource = dma_direct_map_resource, 216 .map_resource = dma_direct_map_resource,
214 .dma_supported = arm_dma_supported, 217 .dma_supported = arm_dma_supported,
218 .get_required_mask = dma_direct_get_required_mask,
215}; 219};
216EXPORT_SYMBOL(arm_coherent_dma_ops); 220EXPORT_SYMBOL(arm_coherent_dma_ops);
217 221
@@ -336,25 +340,6 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
336 pgprot_t prot, struct page **ret_page, 340 pgprot_t prot, struct page **ret_page,
337 const void *caller, bool want_vaddr); 341 const void *caller, bool want_vaddr);
338 342
339static void *
340__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
341 const void *caller)
342{
343 /*
344 * DMA allocation can be mapped to user space, so lets
345 * set VM_USERMAP flags too.
346 */
347 return dma_common_contiguous_remap(page, size,
348 VM_ARM_DMA_CONSISTENT | VM_USERMAP,
349 prot, caller);
350}
351
352static void __dma_free_remap(void *cpu_addr, size_t size)
353{
354 dma_common_free_remap(cpu_addr, size,
355 VM_ARM_DMA_CONSISTENT | VM_USERMAP);
356}
357
358#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K 343#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
359static struct gen_pool *atomic_pool __ro_after_init; 344static struct gen_pool *atomic_pool __ro_after_init;
360 345
@@ -510,7 +495,7 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
510 if (!want_vaddr) 495 if (!want_vaddr)
511 goto out; 496 goto out;
512 497
513 ptr = __dma_alloc_remap(page, size, gfp, prot, caller); 498 ptr = dma_common_contiguous_remap(page, size, prot, caller);
514 if (!ptr) { 499 if (!ptr) {
515 __dma_free_buffer(page, size); 500 __dma_free_buffer(page, size);
516 return NULL; 501 return NULL;
@@ -577,7 +562,7 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
577 goto out; 562 goto out;
578 563
579 if (PageHighMem(page)) { 564 if (PageHighMem(page)) {
580 ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller); 565 ptr = dma_common_contiguous_remap(page, size, prot, caller);
581 if (!ptr) { 566 if (!ptr) {
582 dma_release_from_contiguous(dev, page, count); 567 dma_release_from_contiguous(dev, page, count);
583 return NULL; 568 return NULL;
@@ -597,7 +582,7 @@ static void __free_from_contiguous(struct device *dev, struct page *page,
597{ 582{
598 if (want_vaddr) { 583 if (want_vaddr) {
599 if (PageHighMem(page)) 584 if (PageHighMem(page))
600 __dma_free_remap(cpu_addr, size); 585 dma_common_free_remap(cpu_addr, size);
601 else 586 else
602 __dma_remap(page, size, PAGE_KERNEL); 587 __dma_remap(page, size, PAGE_KERNEL);
603 } 588 }
@@ -689,7 +674,7 @@ static void *remap_allocator_alloc(struct arm_dma_alloc_args *args,
689static void remap_allocator_free(struct arm_dma_free_args *args) 674static void remap_allocator_free(struct arm_dma_free_args *args)
690{ 675{
691 if (args->want_vaddr) 676 if (args->want_vaddr)
692 __dma_free_remap(args->cpu_addr, args->size); 677 dma_common_free_remap(args->cpu_addr, args->size);
693 678
694 __dma_free_buffer(args->page, args->size); 679 __dma_free_buffer(args->page, args->size);
695} 680}
@@ -877,17 +862,6 @@ static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_add
877 __arm_dma_free(dev, size, cpu_addr, handle, attrs, true); 862 __arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
878} 863}
879 864
880/*
881 * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
882 * that the intention is to allow exporting memory allocated via the
883 * coherent DMA APIs through the dma_buf API, which only accepts a
884 * scattertable. This presents a couple of problems:
885 * 1. Not all memory allocated via the coherent DMA APIs is backed by
886 * a struct page
887 * 2. Passing coherent DMA memory into the streaming APIs is not allowed
888 * as we will try to flush the memory through a different alias to that
889 * actually being used (and the flushes are redundant.)
890 */
891int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, 865int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
892 void *cpu_addr, dma_addr_t handle, size_t size, 866 void *cpu_addr, dma_addr_t handle, size_t size,
893 unsigned long attrs) 867 unsigned long attrs)
@@ -1132,10 +1106,6 @@ static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
1132 * 32-bit DMA. 1106 * 32-bit DMA.
1133 * Use the generic dma-direct / swiotlb ops code in that case, as that 1107 * Use the generic dma-direct / swiotlb ops code in that case, as that
1134 * handles bounce buffering for us. 1108 * handles bounce buffering for us.
1135 *
1136 * Note: this checks CONFIG_ARM_LPAE instead of CONFIG_SWIOTLB as the
1137 * latter is also selected by the Xen code, but that code for now relies
1138 * on non-NULL dev_dma_ops. To be cleaned up later.
1139 */ 1109 */
1140 if (IS_ENABLED(CONFIG_ARM_LPAE)) 1110 if (IS_ENABLED(CONFIG_ARM_LPAE))
1141 return NULL; 1111 return NULL;
@@ -1373,17 +1343,6 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages,
1373} 1343}
1374 1344
1375/* 1345/*
1376 * Create a CPU mapping for a specified pages
1377 */
1378static void *
1379__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
1380 const void *caller)
1381{
1382 return dma_common_pages_remap(pages, size,
1383 VM_ARM_DMA_CONSISTENT | VM_USERMAP, prot, caller);
1384}
1385
1386/*
1387 * Create a mapping in device IO address space for specified pages 1346 * Create a mapping in device IO address space for specified pages
1388 */ 1347 */
1389static dma_addr_t 1348static dma_addr_t
@@ -1455,18 +1414,13 @@ static struct page **__atomic_get_pages(void *addr)
1455 1414
1456static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs) 1415static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs)
1457{ 1416{
1458 struct vm_struct *area;
1459
1460 if (__in_atomic_pool(cpu_addr, PAGE_SIZE)) 1417 if (__in_atomic_pool(cpu_addr, PAGE_SIZE))
1461 return __atomic_get_pages(cpu_addr); 1418 return __atomic_get_pages(cpu_addr);
1462 1419
1463 if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) 1420 if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
1464 return cpu_addr; 1421 return cpu_addr;
1465 1422
1466 area = find_vm_area(cpu_addr); 1423 return dma_common_find_pages(cpu_addr);
1467 if (area && (area->flags & VM_ARM_DMA_CONSISTENT))
1468 return area->pages;
1469 return NULL;
1470} 1424}
1471 1425
1472static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp, 1426static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp,
@@ -1539,7 +1493,7 @@ static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size,
1539 if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) 1493 if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
1540 return pages; 1494 return pages;
1541 1495
1542 addr = __iommu_alloc_remap(pages, size, gfp, prot, 1496 addr = dma_common_pages_remap(pages, size, prot,
1543 __builtin_return_address(0)); 1497 __builtin_return_address(0));
1544 if (!addr) 1498 if (!addr)
1545 goto err_mapping; 1499 goto err_mapping;
@@ -1622,10 +1576,8 @@ void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
1622 return; 1576 return;
1623 } 1577 }
1624 1578
1625 if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) { 1579 if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0)
1626 dma_common_free_remap(cpu_addr, size, 1580 dma_common_free_remap(cpu_addr, size);
1627 VM_ARM_DMA_CONSISTENT | VM_USERMAP);
1628 }
1629 1581
1630 __iommu_remove_mapping(dev, handle, size); 1582 __iommu_remove_mapping(dev, handle, size);
1631 __iommu_free_buffer(dev, pages, size, attrs); 1583 __iommu_free_buffer(dev, pages, size, attrs);
@@ -2363,10 +2315,8 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
2363 set_dma_ops(dev, dma_ops); 2315 set_dma_ops(dev, dma_ops);
2364 2316
2365#ifdef CONFIG_XEN 2317#ifdef CONFIG_XEN
2366 if (xen_initial_domain()) { 2318 if (xen_initial_domain())
2367 dev->archdata.dev_dma_ops = dev->dma_ops; 2319 dev->dma_ops = &xen_swiotlb_dma_ops;
2368 dev->dma_ops = xen_dma_ops;
2369 }
2370#endif 2320#endif
2371 dev->archdata.dma_ops_setup = true; 2321 dev->archdata.dma_ops_setup = true;
2372} 2322}
@@ -2402,12 +2352,6 @@ long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
2402 return dma_to_pfn(dev, dma_addr); 2352 return dma_to_pfn(dev, dma_addr);
2403} 2353}
2404 2354
2405pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
2406 unsigned long attrs)
2407{
2408 return __get_dma_pgprot(attrs, prot);
2409}
2410
2411void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, 2355void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
2412 gfp_t gfp, unsigned long attrs) 2356 gfp_t gfp, unsigned long attrs)
2413{ 2357{
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 941356d95a67..88c121ac14b3 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -70,9 +70,6 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page
70#define VM_ARM_MTYPE(mt) ((mt) << 20) 70#define VM_ARM_MTYPE(mt) ((mt) << 20)
71#define VM_ARM_MTYPE_MASK (0x1f << 20) 71#define VM_ARM_MTYPE_MASK (0x1f << 20)
72 72
73/* consistent regions used by dma_alloc_attrs() */
74#define VM_ARM_DMA_CONSISTENT 0x20000000
75
76 73
77struct static_vm { 74struct static_vm {
78 struct vm_struct vm; 75 struct vm_struct vm;
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index d33b77e9add3..2b2c208408bb 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -1,6 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0-only 1// SPDX-License-Identifier: GPL-2.0-only
2#include <linux/cpu.h> 2#include <linux/cpu.h>
3#include <linux/dma-mapping.h> 3#include <linux/dma-noncoherent.h>
4#include <linux/gfp.h> 4#include <linux/gfp.h>
5#include <linux/highmem.h> 5#include <linux/highmem.h>
6#include <linux/export.h> 6#include <linux/export.h>
@@ -35,105 +35,56 @@ unsigned long xen_get_swiotlb_free_pages(unsigned int order)
35 return __get_free_pages(flags, order); 35 return __get_free_pages(flags, order);
36} 36}
37 37
38enum dma_cache_op {
39 DMA_UNMAP,
40 DMA_MAP,
41};
42static bool hypercall_cflush = false; 38static bool hypercall_cflush = false;
43 39
44/* functions called by SWIOTLB */ 40/* buffers in highmem or foreign pages cannot cross page boundaries */
45 41static void dma_cache_maint(dma_addr_t handle, size_t size, u32 op)
46static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
47 size_t size, enum dma_data_direction dir, enum dma_cache_op op)
48{ 42{
49 struct gnttab_cache_flush cflush; 43 struct gnttab_cache_flush cflush;
50 unsigned long xen_pfn;
51 size_t left = size;
52 44
53 xen_pfn = (handle >> XEN_PAGE_SHIFT) + offset / XEN_PAGE_SIZE; 45 cflush.a.dev_bus_addr = handle & XEN_PAGE_MASK;
54 offset %= XEN_PAGE_SIZE; 46 cflush.offset = xen_offset_in_page(handle);
47 cflush.op = op;
55 48
56 do { 49 do {
57 size_t len = left; 50 if (size + cflush.offset > XEN_PAGE_SIZE)
58 51 cflush.length = XEN_PAGE_SIZE - cflush.offset;
59 /* buffers in highmem or foreign pages cannot cross page 52 else
60 * boundaries */ 53 cflush.length = size;
61 if (len + offset > XEN_PAGE_SIZE)
62 len = XEN_PAGE_SIZE - offset;
63
64 cflush.op = 0;
65 cflush.a.dev_bus_addr = xen_pfn << XEN_PAGE_SHIFT;
66 cflush.offset = offset;
67 cflush.length = len;
68
69 if (op == DMA_UNMAP && dir != DMA_TO_DEVICE)
70 cflush.op = GNTTAB_CACHE_INVAL;
71 if (op == DMA_MAP) {
72 if (dir == DMA_FROM_DEVICE)
73 cflush.op = GNTTAB_CACHE_INVAL;
74 else
75 cflush.op = GNTTAB_CACHE_CLEAN;
76 }
77 if (cflush.op)
78 HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1);
79 54
80 offset = 0; 55 HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1);
81 xen_pfn++;
82 left -= len;
83 } while (left);
84}
85 56
86static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle, 57 cflush.offset = 0;
87 size_t size, enum dma_data_direction dir) 58 cflush.a.dev_bus_addr += cflush.length;
88{ 59 size -= cflush.length;
89 dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_UNMAP); 60 } while (size);
90} 61}
91 62
92static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle, 63/*
93 size_t size, enum dma_data_direction dir) 64 * Dom0 is mapped 1:1, and while the Linux page can span across multiple Xen
65 * pages, it is not possible for it to contain a mix of local and foreign Xen
66 * pages. Calling pfn_valid on a foreign mfn will always return false, so if
67 * pfn_valid returns true the pages is local and we can use the native
68 * dma-direct functions, otherwise we call the Xen specific version.
69 */
70void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle,
71 phys_addr_t paddr, size_t size, enum dma_data_direction dir)
94{ 72{
95 dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_MAP); 73 if (pfn_valid(PFN_DOWN(handle)))
74 arch_sync_dma_for_cpu(dev, paddr, size, dir);
75 else if (dir != DMA_TO_DEVICE)
76 dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL);
96} 77}
97 78
98void __xen_dma_map_page(struct device *hwdev, struct page *page, 79void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle,
99 dma_addr_t dev_addr, unsigned long offset, size_t size, 80 phys_addr_t paddr, size_t size, enum dma_data_direction dir)
100 enum dma_data_direction dir, unsigned long attrs)
101{ 81{
102 if (is_device_dma_coherent(hwdev)) 82 if (pfn_valid(PFN_DOWN(handle)))
103 return; 83 arch_sync_dma_for_device(dev, paddr, size, dir);
104 if (attrs & DMA_ATTR_SKIP_CPU_SYNC) 84 else if (dir == DMA_FROM_DEVICE)
105 return; 85 dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL);
106 86 else
107 __xen_dma_page_cpu_to_dev(hwdev, dev_addr, size, dir); 87 dma_cache_maint(handle, size, GNTTAB_CACHE_CLEAN);
108}
109
110void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
111 size_t size, enum dma_data_direction dir,
112 unsigned long attrs)
113
114{
115 if (is_device_dma_coherent(hwdev))
116 return;
117 if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
118 return;
119
120 __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
121}
122
123void __xen_dma_sync_single_for_cpu(struct device *hwdev,
124 dma_addr_t handle, size_t size, enum dma_data_direction dir)
125{
126 if (is_device_dma_coherent(hwdev))
127 return;
128 __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
129}
130
131void __xen_dma_sync_single_for_device(struct device *hwdev,
132 dma_addr_t handle, size_t size, enum dma_data_direction dir)
133{
134 if (is_device_dma_coherent(hwdev))
135 return;
136 __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
137} 88}
138 89
139bool xen_arch_need_swiotlb(struct device *dev, 90bool xen_arch_need_swiotlb(struct device *dev,
@@ -159,7 +110,7 @@ bool xen_arch_need_swiotlb(struct device *dev,
159 * memory and we are not able to flush the cache. 110 * memory and we are not able to flush the cache.
160 */ 111 */
161 return (!hypercall_cflush && (xen_pfn != bfn) && 112 return (!hypercall_cflush && (xen_pfn != bfn) &&
162 !is_device_dma_coherent(dev)); 113 !dev_is_dma_coherent(dev));
163} 114}
164 115
165int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, 116int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
@@ -173,16 +124,11 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
173 *dma_handle = pstart; 124 *dma_handle = pstart;
174 return 0; 125 return 0;
175} 126}
176EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
177 127
178void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) 128void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
179{ 129{
180 return; 130 return;
181} 131}
182EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
183
184const struct dma_map_ops *xen_dma_ops;
185EXPORT_SYMBOL(xen_dma_ops);
186 132
187int __init xen_mm_init(void) 133int __init xen_mm_init(void)
188{ 134{
@@ -190,7 +136,6 @@ int __init xen_mm_init(void)
190 if (!xen_initial_domain()) 136 if (!xen_initial_domain())
191 return 0; 137 return 0;
192 xen_swiotlb_init(1, false); 138 xen_swiotlb_init(1, false);
193 xen_dma_ops = &xen_swiotlb_dma_ops;
194 139
195 cflush.op = 0; 140 cflush.op = 0;
196 cflush.a.dev_bus_addr = 0; 141 cflush.a.dev_bus_addr = 0;
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6ae6ad8a4db0..835a1509882b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -13,7 +13,6 @@ config ARM64
13 select ARCH_HAS_DEBUG_VIRTUAL 13 select ARCH_HAS_DEBUG_VIRTUAL
14 select ARCH_HAS_DEVMEM_IS_ALLOWED 14 select ARCH_HAS_DEVMEM_IS_ALLOWED
15 select ARCH_HAS_DMA_COHERENT_TO_PFN 15 select ARCH_HAS_DMA_COHERENT_TO_PFN
16 select ARCH_HAS_DMA_MMAP_PGPROT
17 select ARCH_HAS_DMA_PREP_COHERENT 16 select ARCH_HAS_DMA_PREP_COHERENT
18 select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI 17 select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
19 select ARCH_HAS_ELF_RANDOMIZE 18 select ARCH_HAS_ELF_RANDOMIZE
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index c52e151afab0..98a5405c8558 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -4,6 +4,7 @@ generic-y += delay.h
4generic-y += div64.h 4generic-y += div64.h
5generic-y += dma.h 5generic-y += dma.h
6generic-y += dma-contiguous.h 6generic-y += dma-contiguous.h
7generic-y += dma-mapping.h
7generic-y += early_ioremap.h 8generic-y += early_ioremap.h
8generic-y += emergency-restart.h 9generic-y += emergency-restart.h
9generic-y += hw_irq.h 10generic-y += hw_irq.h
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
deleted file mode 100644
index fb3e5044f473..000000000000
--- a/arch/arm64/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,28 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright (C) 2012 ARM Ltd.
4 */
5#ifndef __ASM_DMA_MAPPING_H
6#define __ASM_DMA_MAPPING_H
7
8#include <linux/types.h>
9#include <linux/vmalloc.h>
10
11#include <xen/xen.h>
12#include <asm/xen/hypervisor.h>
13
14static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
15{
16 return NULL;
17}
18
19/*
20 * Do not use this function in a driver, it is only provided for
21 * arch/arm/mm/xen.c, which is used by arm64 as well.
22 */
23static inline bool is_device_dma_coherent(struct device *dev)
24{
25 return dev->dma_coherent;
26}
27
28#endif /* __ASM_DMA_MAPPING_H */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 470ba7ae8821..57427d17580e 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -437,6 +437,18 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
437 __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) 437 __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
438#define pgprot_device(prot) \ 438#define pgprot_device(prot) \
439 __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN) 439 __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN)
440/*
441 * DMA allocations for non-coherent devices use what the Arm architecture calls
442 * "Normal non-cacheable" memory, which permits speculation, unaligned accesses
443 * and merging of writes. This is different from "Device-nGnR[nE]" memory which
444 * is intended for MMIO and thus forbids speculation, preserves access size,
445 * requires strict alignment and can also force write responses to come from the
446 * endpoint.
447 */
448#define pgprot_dmacoherent(prot) \
449 __pgprot_modify(prot, PTE_ATTRINDX_MASK, \
450 PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
451
440#define __HAVE_PHYS_MEM_ACCESS_PROT 452#define __HAVE_PHYS_MEM_ACCESS_PROT
441struct file; 453struct file;
442extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, 454extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h
index d88e56b90b93..27e984977402 100644
--- a/arch/arm64/include/asm/xen/page-coherent.h
+++ b/arch/arm64/include/asm/xen/page-coherent.h
@@ -1,77 +1,2 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_ARM64_XEN_PAGE_COHERENT_H
3#define _ASM_ARM64_XEN_PAGE_COHERENT_H
4
5#include <linux/dma-mapping.h>
6#include <asm/page.h>
7#include <xen/arm/page-coherent.h> 2#include <xen/arm/page-coherent.h>
8
9static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
10 dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
11{
12 return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
13}
14
15static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
16 void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
17{
18 dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs);
19}
20
21static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
22 dma_addr_t handle, size_t size, enum dma_data_direction dir)
23{
24 unsigned long pfn = PFN_DOWN(handle);
25
26 if (pfn_valid(pfn))
27 dma_direct_sync_single_for_cpu(hwdev, handle, size, dir);
28 else
29 __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
30}
31
32static inline void xen_dma_sync_single_for_device(struct device *hwdev,
33 dma_addr_t handle, size_t size, enum dma_data_direction dir)
34{
35 unsigned long pfn = PFN_DOWN(handle);
36 if (pfn_valid(pfn))
37 dma_direct_sync_single_for_device(hwdev, handle, size, dir);
38 else
39 __xen_dma_sync_single_for_device(hwdev, handle, size, dir);
40}
41
42static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
43 dma_addr_t dev_addr, unsigned long offset, size_t size,
44 enum dma_data_direction dir, unsigned long attrs)
45{
46 unsigned long page_pfn = page_to_xen_pfn(page);
47 unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
48 unsigned long compound_pages =
49 (1<<compound_order(page)) * XEN_PFN_PER_PAGE;
50 bool local = (page_pfn <= dev_pfn) &&
51 (dev_pfn - page_pfn < compound_pages);
52
53 if (local)
54 dma_direct_map_page(hwdev, page, offset, size, dir, attrs);
55 else
56 __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
57}
58
59static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
60 size_t size, enum dma_data_direction dir, unsigned long attrs)
61{
62 unsigned long pfn = PFN_DOWN(handle);
63 /*
64 * Dom0 is mapped 1:1, while the Linux page can be spanned accross
65 * multiple Xen page, it's not possible to have a mix of local and
66 * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
67 * foreign mfn will always return false. If the page is local we can
68 * safely call the native dma_ops function, otherwise we call the xen
69 * specific function.
70 */
71 if (pfn_valid(pfn))
72 dma_direct_unmap_page(hwdev, handle, size, dir, attrs);
73 else
74 __xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
75}
76
77#endif /* _ASM_ARM64_XEN_PAGE_COHERENT_H */
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index bd2b039f43a6..9239416e93d4 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -8,15 +8,11 @@
8#include <linux/cache.h> 8#include <linux/cache.h>
9#include <linux/dma-noncoherent.h> 9#include <linux/dma-noncoherent.h>
10#include <linux/dma-iommu.h> 10#include <linux/dma-iommu.h>
11#include <xen/xen.h>
12#include <xen/swiotlb-xen.h>
11 13
12#include <asm/cacheflush.h> 14#include <asm/cacheflush.h>
13 15
14pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
15 unsigned long attrs)
16{
17 return pgprot_writecombine(prot);
18}
19
20void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, 16void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
21 size_t size, enum dma_data_direction dir) 17 size_t size, enum dma_data_direction dir)
22{ 18{
@@ -34,12 +30,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
34 __dma_flush_area(page_address(page), size); 30 __dma_flush_area(page_address(page), size);
35} 31}
36 32
37static int __init arm64_dma_init(void)
38{
39 return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
40}
41arch_initcall(arm64_dma_init);
42
43#ifdef CONFIG_IOMMU_DMA 33#ifdef CONFIG_IOMMU_DMA
44void arch_teardown_dma_ops(struct device *dev) 34void arch_teardown_dma_ops(struct device *dev)
45{ 35{
@@ -64,6 +54,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
64 54
65#ifdef CONFIG_XEN 55#ifdef CONFIG_XEN
66 if (xen_initial_domain()) 56 if (xen_initial_domain())
67 dev->dma_ops = xen_dma_ops; 57 dev->dma_ops = &xen_swiotlb_dma_ops;
68#endif 58#endif
69} 59}
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index b4fb61c83494..e65e8d82442a 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -20,7 +20,6 @@ config C6X
20 select OF_EARLY_FLATTREE 20 select OF_EARLY_FLATTREE
21 select GENERIC_CLOCKEVENTS 21 select GENERIC_CLOCKEVENTS
22 select MODULES_USE_ELF_RELA 22 select MODULES_USE_ELF_RELA
23 select ARCH_NO_COHERENT_DMA_MMAP
24 select MMU_GATHER_NO_RANGE if MMU 23 select MMU_GATHER_NO_RANGE if MMU
25 24
26config MMU 25config MMU
diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
index 80783bb71c5c..602a60d47a94 100644
--- a/arch/csky/mm/dma-mapping.c
+++ b/arch/csky/mm/dma-mapping.c
@@ -14,12 +14,6 @@
14#include <linux/version.h> 14#include <linux/version.h>
15#include <asm/cache.h> 15#include <asm/cache.h>
16 16
17static int __init atomic_pool_init(void)
18{
19 return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
20}
21postcore_initcall(atomic_pool_init);
22
23void arch_dma_prep_coherent(struct page *page, size_t size) 17void arch_dma_prep_coherent(struct page *page, size_t size)
24{ 18{
25 if (PageHighMem(page)) { 19 if (PageHighMem(page)) {
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index a7eff5e6d260..a806227c1fad 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -2069,6 +2069,8 @@ static const struct dma_map_ops sba_dma_ops = {
2069 .map_sg = sba_map_sg_attrs, 2069 .map_sg = sba_map_sg_attrs,
2070 .unmap_sg = sba_unmap_sg_attrs, 2070 .unmap_sg = sba_unmap_sg_attrs,
2071 .dma_supported = sba_dma_supported, 2071 .dma_supported = sba_dma_supported,
2072 .mmap = dma_common_mmap,
2073 .get_sgtable = dma_common_get_sgtable,
2072}; 2074};
2073 2075
2074static int __init 2076static int __init
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 8eb276aac5ce..bb320c6d0cc9 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -259,7 +259,7 @@ __initcall(register_memory);
259 * This function checks if the reserved crashkernel is allowed on the specific 259 * This function checks if the reserved crashkernel is allowed on the specific
260 * IA64 machine flavour. Machines without an IO TLB use swiotlb and require 260 * IA64 machine flavour. Machines without an IO TLB use swiotlb and require
261 * some memory below 4 GB (i.e. in 32 bit area), see the implementation of 261 * some memory below 4 GB (i.e. in 32 bit area), see the implementation of
262 * lib/swiotlb.c. The hpzx1 architecture has an IO TLB but cannot use that 262 * kernel/dma/swiotlb.c. The hpzx1 architecture has an IO TLB but cannot use that
263 * in kdump case. See the comment in sba_init() in sba_iommu.c. 263 * in kdump case. See the comment in sba_init() in sba_iommu.c.
264 * 264 *
265 * So, the only machvec that really supports loading the kdump kernel 265 * So, the only machvec that really supports loading the kdump kernel
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index c518d695c376..935599893d3e 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -4,11 +4,9 @@ config M68K
4 default y 4 default y
5 select ARCH_32BIT_OFF_T 5 select ARCH_32BIT_OFF_T
6 select ARCH_HAS_BINFMT_FLAT 6 select ARCH_HAS_BINFMT_FLAT
7 select ARCH_HAS_DMA_MMAP_PGPROT if MMU && !COLDFIRE
8 select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE 7 select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE
9 select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA 8 select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
10 select ARCH_MIGHT_HAVE_PC_PARPORT if ISA 9 select ARCH_MIGHT_HAVE_PC_PARPORT if ISA
11 select ARCH_NO_COHERENT_DMA_MMAP if !MMU
12 select ARCH_NO_PREEMPT if !COLDFIRE 10 select ARCH_NO_PREEMPT if !COLDFIRE
13 select BINFMT_FLAT_ARGVP_ENVP_ON_STACK 11 select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
14 select DMA_DIRECT_REMAP if HAS_DMA && MMU && !COLDFIRE 12 select DMA_DIRECT_REMAP if HAS_DMA && MMU && !COLDFIRE
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index fe3ddd73a0cc..fde4534b974f 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -169,6 +169,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
169 ? (__pgprot((pgprot_val(prot) & _CACHEMASK040) | _PAGE_NOCACHE_S)) \ 169 ? (__pgprot((pgprot_val(prot) & _CACHEMASK040) | _PAGE_NOCACHE_S)) \
170 : (prot))) 170 : (prot)))
171 171
172pgprot_t pgprot_dmacoherent(pgprot_t prot);
173#define pgprot_dmacoherent(prot) pgprot_dmacoherent(prot)
174
172#endif /* CONFIG_COLDFIRE */ 175#endif /* CONFIG_COLDFIRE */
173#include <asm-generic/pgtable.h> 176#include <asm-generic/pgtable.h>
174#endif /* !__ASSEMBLY__ */ 177#endif /* !__ASSEMBLY__ */
diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index 447849d1d645..3fab684cc0db 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -23,8 +23,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
23 cache_push(page_to_phys(page), size); 23 cache_push(page_to_phys(page), size);
24} 24}
25 25
26pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, 26pgprot_t pgprot_dmacoherent(pgprot_t prot)
27 unsigned long attrs)
28{ 27{
29 if (CPU_IS_040_OR_060) { 28 if (CPU_IS_040_OR_060) {
30 pgprot_val(prot) &= ~_PAGE_CACHE040; 29 pgprot_val(prot) &= ~_PAGE_CACHE040;
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index d411de05b628..632c9477a0f6 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -9,7 +9,6 @@ config MICROBLAZE
9 select ARCH_HAS_SYNC_DMA_FOR_CPU 9 select ARCH_HAS_SYNC_DMA_FOR_CPU
10 select ARCH_HAS_SYNC_DMA_FOR_DEVICE 10 select ARCH_HAS_SYNC_DMA_FOR_DEVICE
11 select ARCH_MIGHT_HAVE_PC_PARPORT 11 select ARCH_MIGHT_HAVE_PC_PARPORT
12 select ARCH_NO_COHERENT_DMA_MMAP if !MMU
13 select ARCH_WANT_IPC_PARSE_VERSION 12 select ARCH_WANT_IPC_PARSE_VERSION
14 select BUILDTIME_EXTABLE_SORT 13 select BUILDTIME_EXTABLE_SORT
15 select TIMER_OF 14 select TIMER_OF
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index d50fafd7bf3a..aff1cadeea43 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1119,7 +1119,14 @@ config DMA_PERDEV_COHERENT
1119 1119
1120config DMA_NONCOHERENT 1120config DMA_NONCOHERENT
1121 bool 1121 bool
1122 select ARCH_HAS_DMA_MMAP_PGPROT 1122 #
1123 # MIPS allows mixing "slightly different" Cacheability and Coherency
1124 # Attribute bits. It is believed that the uncached access through
1125 # KSEG1 and the implementation specific "uncached accelerated" used
1126 # by pgprot_writcombine can be mixed, and the latter sometimes provides
1127 # significant advantages.
1128 #
1129 select ARCH_HAS_DMA_WRITE_COMBINE
1123 select ARCH_HAS_SYNC_DMA_FOR_DEVICE 1130 select ARCH_HAS_SYNC_DMA_FOR_DEVICE
1124 select ARCH_HAS_UNCACHED_SEGMENT 1131 select ARCH_HAS_UNCACHED_SEGMENT
1125 select NEED_DMA_MAP_STATE 1132 select NEED_DMA_MAP_STATE
diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index 1804dc9d8136..a01e14955187 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -682,5 +682,7 @@ const struct dma_map_ops jazz_dma_ops = {
682 .sync_sg_for_device = jazz_dma_sync_sg_for_device, 682 .sync_sg_for_device = jazz_dma_sync_sg_for_device,
683 .dma_supported = dma_direct_supported, 683 .dma_supported = dma_direct_supported,
684 .cache_sync = arch_dma_cache_sync, 684 .cache_sync = arch_dma_cache_sync,
685 .mmap = dma_common_mmap,
686 .get_sgtable = dma_common_get_sgtable,
685}; 687};
686EXPORT_SYMBOL(jazz_dma_ops); 688EXPORT_SYMBOL(jazz_dma_ops);
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index ed56c6fa7be2..1d4d57dd9acf 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -65,14 +65,6 @@ long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
65 return page_to_pfn(virt_to_page(cached_kernel_address(cpu_addr))); 65 return page_to_pfn(virt_to_page(cached_kernel_address(cpu_addr)));
66} 66}
67 67
68pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
69 unsigned long attrs)
70{
71 if (attrs & DMA_ATTR_WRITE_COMBINE)
72 return pgprot_writecombine(prot);
73 return pgprot_noncached(prot);
74}
75
76static inline void dma_sync_virt(void *addr, size_t size, 68static inline void dma_sync_virt(void *addr, size_t size,
77 enum dma_data_direction dir) 69 enum dma_data_direction dir)
78{ 70{
diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c
index 490e3720d694..4206d4b6c8ce 100644
--- a/arch/nds32/kernel/dma.c
+++ b/arch/nds32/kernel/dma.c
@@ -80,9 +80,3 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
80{ 80{
81 cache_op(page_to_phys(page), size, cpu_dma_wbinval_range); 81 cache_op(page_to_phys(page), size, cpu_dma_wbinval_range);
82} 82}
83
84static int __init atomic_pool_init(void)
85{
86 return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
87}
88postcore_initcall(atomic_pool_init);
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 2e757c785239..b16237c95ea3 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -52,7 +52,6 @@ config PARISC
52 select GENERIC_SCHED_CLOCK 52 select GENERIC_SCHED_CLOCK
53 select HAVE_UNSTABLE_SCHED_CLOCK if SMP 53 select HAVE_UNSTABLE_SCHED_CLOCK if SMP
54 select GENERIC_CLOCKEVENTS 54 select GENERIC_CLOCKEVENTS
55 select ARCH_NO_COHERENT_DMA_MMAP
56 select CPU_NO_EFFICIENT_FFS 55 select CPU_NO_EFFICIENT_FFS
57 select NEED_DMA_MAP_STATE 56 select NEED_DMA_MAP_STATE
58 select NEED_SG_DMA_LENGTH 57 select NEED_SG_DMA_LENGTH
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index a0879674a9c8..2f5a53874f6d 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -208,4 +208,6 @@ const struct dma_map_ops dma_iommu_ops = {
208 .sync_single_for_device = dma_iommu_sync_for_device, 208 .sync_single_for_device = dma_iommu_sync_for_device,
209 .sync_sg_for_cpu = dma_iommu_sync_sg_for_cpu, 209 .sync_sg_for_cpu = dma_iommu_sync_sg_for_cpu,
210 .sync_sg_for_device = dma_iommu_sync_sg_for_device, 210 .sync_sg_for_device = dma_iommu_sync_sg_for_device,
211 .mmap = dma_common_mmap,
212 .get_sgtable = dma_common_get_sgtable,
211}; 213};
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index 98410119c47b..3542b7bd6a46 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -686,20 +686,16 @@ static int ps3_dma_supported(struct device *_dev, u64 mask)
686 return mask >= DMA_BIT_MASK(32); 686 return mask >= DMA_BIT_MASK(32);
687} 687}
688 688
689static u64 ps3_dma_get_required_mask(struct device *_dev)
690{
691 return DMA_BIT_MASK(32);
692}
693
694static const struct dma_map_ops ps3_sb_dma_ops = { 689static const struct dma_map_ops ps3_sb_dma_ops = {
695 .alloc = ps3_alloc_coherent, 690 .alloc = ps3_alloc_coherent,
696 .free = ps3_free_coherent, 691 .free = ps3_free_coherent,
697 .map_sg = ps3_sb_map_sg, 692 .map_sg = ps3_sb_map_sg,
698 .unmap_sg = ps3_sb_unmap_sg, 693 .unmap_sg = ps3_sb_unmap_sg,
699 .dma_supported = ps3_dma_supported, 694 .dma_supported = ps3_dma_supported,
700 .get_required_mask = ps3_dma_get_required_mask,
701 .map_page = ps3_sb_map_page, 695 .map_page = ps3_sb_map_page,
702 .unmap_page = ps3_unmap_page, 696 .unmap_page = ps3_unmap_page,
697 .mmap = dma_common_mmap,
698 .get_sgtable = dma_common_get_sgtable,
703}; 699};
704 700
705static const struct dma_map_ops ps3_ioc0_dma_ops = { 701static const struct dma_map_ops ps3_ioc0_dma_ops = {
@@ -708,9 +704,10 @@ static const struct dma_map_ops ps3_ioc0_dma_ops = {
708 .map_sg = ps3_ioc0_map_sg, 704 .map_sg = ps3_ioc0_map_sg,
709 .unmap_sg = ps3_ioc0_unmap_sg, 705 .unmap_sg = ps3_ioc0_unmap_sg,
710 .dma_supported = ps3_dma_supported, 706 .dma_supported = ps3_dma_supported,
711 .get_required_mask = ps3_dma_get_required_mask,
712 .map_page = ps3_ioc0_map_page, 707 .map_page = ps3_ioc0_map_page,
713 .unmap_page = ps3_unmap_page, 708 .unmap_page = ps3_unmap_page,
709 .mmap = dma_common_mmap,
710 .get_sgtable = dma_common_get_sgtable,
714}; 711};
715 712
716/** 713/**
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 6601b9d404dc..3473eef7628c 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -605,6 +605,8 @@ static const struct dma_map_ops vio_dma_mapping_ops = {
605 .unmap_page = vio_dma_iommu_unmap_page, 605 .unmap_page = vio_dma_iommu_unmap_page,
606 .dma_supported = dma_iommu_dma_supported, 606 .dma_supported = dma_iommu_dma_supported,
607 .get_required_mask = dma_iommu_get_required_mask, 607 .get_required_mask = dma_iommu_get_required_mask,
608 .mmap = dma_common_mmap,
609 .get_sgtable = dma_common_get_sgtable,
608}; 610};
609 611
610/** 612/**
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index fb2c7db0164e..64b1399a73f0 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -668,6 +668,8 @@ const struct dma_map_ops s390_pci_dma_ops = {
668 .unmap_sg = s390_dma_unmap_sg, 668 .unmap_sg = s390_dma_unmap_sg,
669 .map_page = s390_dma_map_pages, 669 .map_page = s390_dma_map_pages,
670 .unmap_page = s390_dma_unmap_pages, 670 .unmap_page = s390_dma_unmap_pages,
671 .mmap = dma_common_mmap,
672 .get_sgtable = dma_common_get_sgtable,
671 /* dma_supported is unconditionally true without a callback */ 673 /* dma_supported is unconditionally true without a callback */
672}; 674};
673EXPORT_SYMBOL_GPL(s390_pci_dma_ops); 675EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 6b1b5941b618..f356ee674d89 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -5,7 +5,6 @@ config SUPERH
5 select ARCH_HAS_PTE_SPECIAL 5 select ARCH_HAS_PTE_SPECIAL
6 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST 6 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
7 select ARCH_MIGHT_HAVE_PC_PARPORT 7 select ARCH_MIGHT_HAVE_PC_PARPORT
8 select ARCH_NO_COHERENT_DMA_MMAP if !MMU
9 select HAVE_PATA_PLATFORM 8 select HAVE_PATA_PLATFORM
10 select CLKDEV_LOOKUP 9 select CLKDEV_LOOKUP
11 select DMA_DECLARE_COHERENT 10 select DMA_DECLARE_COHERENT
diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h
index 9492aa304f03..126e961a8cb0 100644
--- a/arch/unicore32/include/asm/pgtable.h
+++ b/arch/unicore32/include/asm/pgtable.h
@@ -198,8 +198,6 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
198 __pgprot(pgprot_val(prot) & ~PTE_CACHEABLE) 198 __pgprot(pgprot_val(prot) & ~PTE_CACHEABLE)
199#define pgprot_writecombine(prot) \ 199#define pgprot_writecombine(prot) \
200 __pgprot(pgprot_val(prot) & ~PTE_CACHEABLE) 200 __pgprot(pgprot_val(prot) & ~PTE_CACHEABLE)
201#define pgprot_dmacoherent(prot) \
202 __pgprot(pgprot_val(prot) & ~PTE_CACHEABLE)
203 201
204#define pmd_none(pmd) (!pmd_val(pmd)) 202#define pmd_none(pmd) (!pmd_val(pmd))
205#define pmd_present(pmd) (pmd_val(pmd) & PMD_PRESENT) 203#define pmd_present(pmd) (pmd_val(pmd) & PMD_PRESENT)
diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h
index 116777e7f387..63cd41b2e17a 100644
--- a/arch/x86/include/asm/xen/page-coherent.h
+++ b/arch/x86/include/asm/xen/page-coherent.h
@@ -21,18 +21,4 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
21 free_pages((unsigned long) cpu_addr, get_order(size)); 21 free_pages((unsigned long) cpu_addr, get_order(size));
22} 22}
23 23
24static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
25 dma_addr_t dev_addr, unsigned long offset, size_t size,
26 enum dma_data_direction dir, unsigned long attrs) { }
27
28static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
29 size_t size, enum dma_data_direction dir,
30 unsigned long attrs) { }
31
32static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
33 dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
34
35static inline void xen_dma_sync_single_for_device(struct device *hwdev,
36 dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
37
38#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */ 24#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index a585ea6f686a..a6ac3712db8b 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -677,7 +677,10 @@ static const struct dma_map_ops gart_dma_ops = {
677 .unmap_page = gart_unmap_page, 677 .unmap_page = gart_unmap_page,
678 .alloc = gart_alloc_coherent, 678 .alloc = gart_alloc_coherent,
679 .free = gart_free_coherent, 679 .free = gart_free_coherent,
680 .mmap = dma_common_mmap,
681 .get_sgtable = dma_common_get_sgtable,
680 .dma_supported = dma_direct_supported, 682 .dma_supported = dma_direct_supported,
683 .get_required_mask = dma_direct_get_required_mask,
681}; 684};
682 685
683static void gart_iommu_shutdown(void) 686static void gart_iommu_shutdown(void)
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 9d4343aa481b..23fdec030c37 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -468,6 +468,8 @@ static const struct dma_map_ops calgary_dma_ops = {
468 .map_page = calgary_map_page, 468 .map_page = calgary_map_page,
469 .unmap_page = calgary_unmap_page, 469 .unmap_page = calgary_unmap_page,
470 .dma_supported = dma_direct_supported, 470 .dma_supported = dma_direct_supported,
471 .mmap = dma_common_mmap,
472 .get_sgtable = dma_common_get_sgtable,
471}; 473};
472 474
473static inline void __iomem * busno_to_bbar(unsigned char num) 475static inline void __iomem * busno_to_bbar(unsigned char num)
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 5f5302028a9a..c2cfa5e7c152 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -1,5 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2/* Glue code to lib/swiotlb.c */
3 2
4#include <linux/pci.h> 3#include <linux/pci.h>
5#include <linux/cache.h> 4#include <linux/cache.h>
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index bbe35bf879f5..77ea96b794bd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -486,7 +486,7 @@ static int __init reserve_crashkernel_low(void)
486 ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); 486 ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base);
487 if (ret) { 487 if (ret) {
488 /* 488 /*
489 * two parts from lib/swiotlb.c: 489 * two parts from kernel/dma/swiotlb.c:
490 * -swiotlb size: user-specified with swiotlb= or default. 490 * -swiotlb size: user-specified with swiotlb= or default.
491 * 491 *
492 * -swiotlb overflow buffer: now hardcoded to 32k. We round it 492 * -swiotlb overflow buffer: now hardcoded to 32k. We round it
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 97bbc12dd6b2..6269a175385d 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -1,8 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0-only 1// SPDX-License-Identifier: GPL-2.0-only
2/* 2/*
3 * arch/x86/pci/sta2x11-fixup.c 3 * DMA translation between STA2x11 AMBA memory mapping and the x86 memory mapping
4 * glue code for lib/swiotlb.c and DMA translation between STA2x11
5 * AMBA memory mapping and the X86 memory mapping
6 * 4 *
7 * ST Microelectronics ConneXt (STA2X11/STA2X10) 5 * ST Microelectronics ConneXt (STA2X11/STA2X10)
8 * 6 *
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 26e8b326966d..c8dbee62ec2a 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2625,7 +2625,6 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
2625 *dma_handle = virt_to_machine(vstart).maddr; 2625 *dma_handle = virt_to_machine(vstart).maddr;
2626 return success ? 0 : -ENOMEM; 2626 return success ? 0 : -ENOMEM;
2627} 2627}
2628EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
2629 2628
2630void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) 2629void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
2631{ 2630{
@@ -2660,7 +2659,6 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
2660 2659
2661 spin_unlock_irqrestore(&xen_reservation_lock, flags); 2660 spin_unlock_irqrestore(&xen_reservation_lock, flags);
2662} 2661}
2663EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
2664 2662
2665static noinline void xen_flush_tlb_all(void) 2663static noinline void xen_flush_tlb_all(void)
2666{ 2664{
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index fb64469ca8f0..a8e7beb6b7b5 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -5,7 +5,6 @@ config XTENSA
5 select ARCH_HAS_BINFMT_FLAT if !MMU 5 select ARCH_HAS_BINFMT_FLAT if !MMU
6 select ARCH_HAS_SYNC_DMA_FOR_CPU 6 select ARCH_HAS_SYNC_DMA_FOR_CPU
7 select ARCH_HAS_SYNC_DMA_FOR_DEVICE 7 select ARCH_HAS_SYNC_DMA_FOR_DEVICE
8 select ARCH_NO_COHERENT_DMA_MMAP if !MMU
9 select ARCH_USE_QUEUED_RWLOCKS 8 select ARCH_USE_QUEUED_RWLOCKS
10 select ARCH_USE_QUEUED_SPINLOCKS 9 select ARCH_USE_QUEUED_SPINLOCKS
11 select ARCH_WANT_FRAME_POINTERS 10 select ARCH_WANT_FRAME_POINTERS
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 65f05776d827..154979d62b73 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -167,7 +167,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
167 if (PageHighMem(page)) { 167 if (PageHighMem(page)) {
168 void *p; 168 void *p;
169 169
170 p = dma_common_contiguous_remap(page, size, VM_MAP, 170 p = dma_common_contiguous_remap(page, size,
171 pgprot_noncached(PAGE_KERNEL), 171 pgprot_noncached(PAGE_KERNEL),
172 __builtin_return_address(0)); 172 __builtin_return_address(0));
173 if (!p) { 173 if (!p) {
@@ -192,7 +192,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
192 page = virt_to_page(platform_vaddr_to_cached(vaddr)); 192 page = virt_to_page(platform_vaddr_to_cached(vaddr));
193 } else { 193 } else {
194#ifdef CONFIG_MMU 194#ifdef CONFIG_MMU
195 dma_common_free_remap(vaddr, size, VM_MAP); 195 dma_common_free_remap(vaddr, size);
196#endif 196#endif
197 page = pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_handle))); 197 page = pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_handle)));
198 } 198 }
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 6bd1e3b082d8..5f6dcc7a47bd 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -12,6 +12,7 @@
12#include <linux/lcm.h> 12#include <linux/lcm.h>
13#include <linux/jiffies.h> 13#include <linux/jiffies.h>
14#include <linux/gfp.h> 14#include <linux/gfp.h>
15#include <linux/dma-mapping.h>
15 16
16#include "blk.h" 17#include "blk.h"
17#include "blk-wbt.h" 18#include "blk-wbt.h"
@@ -848,6 +849,28 @@ void blk_queue_required_elevator_features(struct request_queue *q,
848} 849}
849EXPORT_SYMBOL_GPL(blk_queue_required_elevator_features); 850EXPORT_SYMBOL_GPL(blk_queue_required_elevator_features);
850 851
852/**
853 * blk_queue_can_use_dma_map_merging - configure queue for merging segments.
854 * @q: the request queue for the device
855 * @dev: the device pointer for dma
856 *
857 * Tell the block layer about merging the segments by dma map of @q.
858 */
859bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
860 struct device *dev)
861{
862 unsigned long boundary = dma_get_merge_boundary(dev);
863
864 if (!boundary)
865 return false;
866
867 /* No need to update max_segment_size. see blk_queue_virt_boundary() */
868 blk_queue_virt_boundary(q, boundary);
869
870 return true;
871}
872EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging);
873
851static int __init blk_settings_init(void) 874static int __init blk_settings_init(void)
852{ 875{
853 blk_max_low_pfn = max_low_pfn - 1; 876 blk_max_low_pfn = max_low_pfn - 1;
diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c
index 785c5546067a..ed0ccbeed70f 100644
--- a/drivers/gpu/drm/omapdrm/dss/dispc.c
+++ b/drivers/gpu/drm/omapdrm/dss/dispc.c
@@ -4609,11 +4609,10 @@ static int dispc_errata_i734_wa_init(struct dispc_device *dispc)
4609 i734_buf.size = i734.ovli.width * i734.ovli.height * 4609 i734_buf.size = i734.ovli.width * i734.ovli.height *
4610 color_mode_to_bpp(i734.ovli.fourcc) / 8; 4610 color_mode_to_bpp(i734.ovli.fourcc) / 8;
4611 4611
4612 i734_buf.vaddr = dma_alloc_writecombine(&dispc->pdev->dev, 4612 i734_buf.vaddr = dma_alloc_wc(&dispc->pdev->dev, i734_buf.size,
4613 i734_buf.size, &i734_buf.paddr, 4613 &i734_buf.paddr, GFP_KERNEL);
4614 GFP_KERNEL);
4615 if (!i734_buf.vaddr) { 4614 if (!i734_buf.vaddr) {
4616 dev_err(&dispc->pdev->dev, "%s: dma_alloc_writecombine failed\n", 4615 dev_err(&dispc->pdev->dev, "%s: dma_alloc_wc failed\n",
4617 __func__); 4616 __func__);
4618 return -ENOMEM; 4617 return -ENOMEM;
4619 } 4618 }
@@ -4626,8 +4625,8 @@ static void dispc_errata_i734_wa_fini(struct dispc_device *dispc)
4626 if (!dispc->feat->has_gamma_i734_bug) 4625 if (!dispc->feat->has_gamma_i734_bug)
4627 return; 4626 return;
4628 4627
4629 dma_free_writecombine(&dispc->pdev->dev, i734_buf.size, i734_buf.vaddr, 4628 dma_free_wc(&dispc->pdev->dev, i734_buf.size, i734_buf.vaddr,
4630 i734_buf.paddr); 4629 i734_buf.paddr);
4631} 4630}
4632 4631
4633static void dispc_errata_i734_wa(struct dispc_device *dispc) 4632static void dispc_errata_i734_wa(struct dispc_device *dispc)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 1ed3b98324ba..97975bb7f347 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2754,6 +2754,8 @@ static const struct dma_map_ops amd_iommu_dma_ops = {
2754 .map_sg = map_sg, 2754 .map_sg = map_sg,
2755 .unmap_sg = unmap_sg, 2755 .unmap_sg = unmap_sg,
2756 .dma_supported = amd_iommu_dma_supported, 2756 .dma_supported = amd_iommu_dma_supported,
2757 .mmap = dma_common_mmap,
2758 .get_sgtable = dma_common_get_sgtable,
2757}; 2759};
2758 2760
2759static int init_reserved_iova_ranges(void) 2761static int init_reserved_iova_ranges(void)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 8f412af84247..f321279baf9e 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -548,15 +548,6 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev,
548 return pages; 548 return pages;
549} 549}
550 550
551static struct page **__iommu_dma_get_pages(void *cpu_addr)
552{
553 struct vm_struct *area = find_vm_area(cpu_addr);
554
555 if (!area || !area->pages)
556 return NULL;
557 return area->pages;
558}
559
560/** 551/**
561 * iommu_dma_alloc_remap - Allocate and map a buffer contiguous in IOVA space 552 * iommu_dma_alloc_remap - Allocate and map a buffer contiguous in IOVA space
562 * @dev: Device to allocate memory for. Must be a real device 553 * @dev: Device to allocate memory for. Must be a real device
@@ -624,7 +615,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
624 < size) 615 < size)
625 goto out_free_sg; 616 goto out_free_sg;
626 617
627 vaddr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, 618 vaddr = dma_common_pages_remap(pages, size, prot,
628 __builtin_return_address(0)); 619 __builtin_return_address(0));
629 if (!vaddr) 620 if (!vaddr)
630 goto out_unmap; 621 goto out_unmap;
@@ -945,10 +936,10 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
945 * If it the address is remapped, then it's either non-coherent 936 * If it the address is remapped, then it's either non-coherent
946 * or highmem CMA, or an iommu_dma_alloc_remap() construction. 937 * or highmem CMA, or an iommu_dma_alloc_remap() construction.
947 */ 938 */
948 pages = __iommu_dma_get_pages(cpu_addr); 939 pages = dma_common_find_pages(cpu_addr);
949 if (!pages) 940 if (!pages)
950 page = vmalloc_to_page(cpu_addr); 941 page = vmalloc_to_page(cpu_addr);
951 dma_common_free_remap(cpu_addr, alloc_size, VM_USERMAP); 942 dma_common_free_remap(cpu_addr, alloc_size);
952 } else { 943 } else {
953 /* Lowmem means a coherent atomic or CMA allocation */ 944 /* Lowmem means a coherent atomic or CMA allocation */
954 page = virt_to_page(cpu_addr); 945 page = virt_to_page(cpu_addr);
@@ -986,7 +977,7 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
986 pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); 977 pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
987 978
988 cpu_addr = dma_common_contiguous_remap(page, alloc_size, 979 cpu_addr = dma_common_contiguous_remap(page, alloc_size,
989 VM_USERMAP, prot, __builtin_return_address(0)); 980 prot, __builtin_return_address(0));
990 if (!cpu_addr) 981 if (!cpu_addr)
991 goto out_free_pages; 982 goto out_free_pages;
992 983
@@ -1052,7 +1043,7 @@ static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
1052 return -ENXIO; 1043 return -ENXIO;
1053 1044
1054 if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { 1045 if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
1055 struct page **pages = __iommu_dma_get_pages(cpu_addr); 1046 struct page **pages = dma_common_find_pages(cpu_addr);
1056 1047
1057 if (pages) 1048 if (pages)
1058 return __iommu_dma_mmap(pages, size, vma); 1049 return __iommu_dma_mmap(pages, size, vma);
@@ -1074,7 +1065,7 @@ static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
1074 int ret; 1065 int ret;
1075 1066
1076 if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { 1067 if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
1077 struct page **pages = __iommu_dma_get_pages(cpu_addr); 1068 struct page **pages = dma_common_find_pages(cpu_addr);
1078 1069
1079 if (pages) { 1070 if (pages) {
1080 return sg_alloc_table_from_pages(sgt, pages, 1071 return sg_alloc_table_from_pages(sgt, pages,
@@ -1093,6 +1084,13 @@ static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
1093 return ret; 1084 return ret;
1094} 1085}
1095 1086
1087static unsigned long iommu_dma_get_merge_boundary(struct device *dev)
1088{
1089 struct iommu_domain *domain = iommu_get_dma_domain(dev);
1090
1091 return (1UL << __ffs(domain->pgsize_bitmap)) - 1;
1092}
1093
1096static const struct dma_map_ops iommu_dma_ops = { 1094static const struct dma_map_ops iommu_dma_ops = {
1097 .alloc = iommu_dma_alloc, 1095 .alloc = iommu_dma_alloc,
1098 .free = iommu_dma_free, 1096 .free = iommu_dma_free,
@@ -1108,6 +1106,7 @@ static const struct dma_map_ops iommu_dma_ops = {
1108 .sync_sg_for_device = iommu_dma_sync_sg_for_device, 1106 .sync_sg_for_device = iommu_dma_sync_sg_for_device,
1109 .map_resource = iommu_dma_map_resource, 1107 .map_resource = iommu_dma_map_resource,
1110 .unmap_resource = iommu_dma_unmap_resource, 1108 .unmap_resource = iommu_dma_unmap_resource,
1109 .get_merge_boundary = iommu_dma_get_merge_boundary,
1111}; 1110};
1112 1111
1113/* 1112/*
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 87de0b975672..3f974919d3bd 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3785,6 +3785,8 @@ static const struct dma_map_ops intel_dma_ops = {
3785 .map_resource = intel_map_resource, 3785 .map_resource = intel_map_resource,
3786 .unmap_resource = intel_unmap_resource, 3786 .unmap_resource = intel_unmap_resource,
3787 .dma_supported = dma_direct_supported, 3787 .dma_supported = dma_direct_supported,
3788 .mmap = dma_common_mmap,
3789 .get_sgtable = dma_common_get_sgtable,
3788}; 3790};
3789 3791
3790static void 3792static void
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 7102e2ebc614..9edc08685e86 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -21,6 +21,8 @@
21#include "card.h" 21#include "card.h"
22#include "host.h" 22#include "host.h"
23 23
24#define MMC_DMA_MAP_MERGE_SEGMENTS 512
25
24static inline bool mmc_cqe_dcmd_busy(struct mmc_queue *mq) 26static inline bool mmc_cqe_dcmd_busy(struct mmc_queue *mq)
25{ 27{
26 /* Allow only 1 DCMD at a time */ 28 /* Allow only 1 DCMD at a time */
@@ -193,6 +195,12 @@ static void mmc_queue_setup_discard(struct request_queue *q,
193 blk_queue_flag_set(QUEUE_FLAG_SECERASE, q); 195 blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
194} 196}
195 197
198static unsigned int mmc_get_max_segments(struct mmc_host *host)
199{
200 return host->can_dma_map_merge ? MMC_DMA_MAP_MERGE_SEGMENTS :
201 host->max_segs;
202}
203
196/** 204/**
197 * mmc_init_request() - initialize the MMC-specific per-request data 205 * mmc_init_request() - initialize the MMC-specific per-request data
198 * @q: the request queue 206 * @q: the request queue
@@ -206,7 +214,7 @@ static int __mmc_init_request(struct mmc_queue *mq, struct request *req,
206 struct mmc_card *card = mq->card; 214 struct mmc_card *card = mq->card;
207 struct mmc_host *host = card->host; 215 struct mmc_host *host = card->host;
208 216
209 mq_rq->sg = mmc_alloc_sg(host->max_segs, gfp); 217 mq_rq->sg = mmc_alloc_sg(mmc_get_max_segments(host), gfp);
210 if (!mq_rq->sg) 218 if (!mq_rq->sg)
211 return -ENOMEM; 219 return -ENOMEM;
212 220
@@ -362,13 +370,23 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
362 blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH); 370 blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH);
363 blk_queue_max_hw_sectors(mq->queue, 371 blk_queue_max_hw_sectors(mq->queue,
364 min(host->max_blk_count, host->max_req_size / 512)); 372 min(host->max_blk_count, host->max_req_size / 512));
365 blk_queue_max_segments(mq->queue, host->max_segs); 373 if (host->can_dma_map_merge)
374 WARN(!blk_queue_can_use_dma_map_merging(mq->queue,
375 mmc_dev(host)),
376 "merging was advertised but not possible");
377 blk_queue_max_segments(mq->queue, mmc_get_max_segments(host));
366 378
367 if (mmc_card_mmc(card)) 379 if (mmc_card_mmc(card))
368 block_size = card->ext_csd.data_sector_size; 380 block_size = card->ext_csd.data_sector_size;
369 381
370 blk_queue_logical_block_size(mq->queue, block_size); 382 blk_queue_logical_block_size(mq->queue, block_size);
371 blk_queue_max_segment_size(mq->queue, 383 /*
384 * After blk_queue_can_use_dma_map_merging() was called with succeed,
385 * since it calls blk_queue_virt_boundary(), the mmc should not call
386 * both blk_queue_max_segment_size().
387 */
388 if (!host->can_dma_map_merge)
389 blk_queue_max_segment_size(mq->queue,
372 round_down(host->max_seg_size, block_size)); 390 round_down(host->max_seg_size, block_size));
373 391
374 dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); 392 dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue));
@@ -381,6 +399,11 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
381 init_waitqueue_head(&mq->wait); 399 init_waitqueue_head(&mq->wait);
382} 400}
383 401
402static inline bool mmc_merge_capable(struct mmc_host *host)
403{
404 return host->caps2 & MMC_CAP2_MERGE_CAPABLE;
405}
406
384/* Set queue depth to get a reasonable value for q->nr_requests */ 407/* Set queue depth to get a reasonable value for q->nr_requests */
385#define MMC_QUEUE_DEPTH 64 408#define MMC_QUEUE_DEPTH 64
386 409
@@ -418,6 +441,18 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
418 mq->tag_set.cmd_size = sizeof(struct mmc_queue_req); 441 mq->tag_set.cmd_size = sizeof(struct mmc_queue_req);
419 mq->tag_set.driver_data = mq; 442 mq->tag_set.driver_data = mq;
420 443
444 /*
445 * Since blk_mq_alloc_tag_set() calls .init_request() of mmc_mq_ops,
446 * the host->can_dma_map_merge should be set before to get max_segs
447 * from mmc_get_max_segments().
448 */
449 if (mmc_merge_capable(host) &&
450 host->max_segs < MMC_DMA_MAP_MERGE_SEGMENTS &&
451 dma_get_merge_boundary(mmc_dev(host)))
452 host->can_dma_map_merge = 1;
453 else
454 host->can_dma_map_merge = 0;
455
421 ret = blk_mq_alloc_tag_set(&mq->tag_set); 456 ret = blk_mq_alloc_tag_set(&mq->tag_set);
422 if (ret) 457 if (ret)
423 return ret; 458 return ret;
diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
index 751fe91c7571..a66f8d6d61d1 100644
--- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
@@ -106,7 +106,7 @@ static const struct renesas_sdhi_of_data of_rcar_gen3_compatible = {
106 TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2, 106 TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2,
107 .capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ | 107 .capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
108 MMC_CAP_CMD23, 108 MMC_CAP_CMD23,
109 .capabilities2 = MMC_CAP2_NO_WRITE_PROTECT, 109 .capabilities2 = MMC_CAP2_NO_WRITE_PROTECT | MMC_CAP2_MERGE_CAPABLE,
110 .bus_shift = 2, 110 .bus_shift = 2,
111 .scc_offset = 0x1000, 111 .scc_offset = 0x1000,
112 .taps = rcar_gen3_scc_taps, 112 .taps = rcar_gen3_scc_taps,
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index 217f15aafa4a..ad290f79983b 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -1024,6 +1024,7 @@ static const struct dma_map_ops ccio_ops = {
1024 .unmap_page = ccio_unmap_page, 1024 .unmap_page = ccio_unmap_page,
1025 .map_sg = ccio_map_sg, 1025 .map_sg = ccio_map_sg,
1026 .unmap_sg = ccio_unmap_sg, 1026 .unmap_sg = ccio_unmap_sg,
1027 .get_sgtable = dma_common_get_sgtable,
1027}; 1028};
1028 1029
1029#ifdef CONFIG_PROC_FS 1030#ifdef CONFIG_PROC_FS
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index 296668caf7e5..ed50502cc65a 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -1084,6 +1084,7 @@ static const struct dma_map_ops sba_ops = {
1084 .unmap_page = sba_unmap_page, 1084 .unmap_page = sba_unmap_page,
1085 .map_sg = sba_map_sg, 1085 .map_sg = sba_map_sg,
1086 .unmap_sg = sba_unmap_sg, 1086 .unmap_sg = sba_unmap_sg,
1087 .get_sgtable = dma_common_get_sgtable,
1087}; 1088};
1088 1089
1089 1090
diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index 28ed306982f7..94afdde4bc9f 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -2,7 +2,7 @@
2menu "Remoteproc drivers" 2menu "Remoteproc drivers"
3 3
4config REMOTEPROC 4config REMOTEPROC
5 tristate "Support for Remote Processor subsystem" 5 bool "Support for Remote Processor subsystem"
6 depends on HAS_DMA 6 depends on HAS_DMA
7 select CRC32 7 select CRC32
8 select FW_LOADER 8 select FW_LOADER
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index adcabd9473eb..58c9365fa217 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -28,6 +28,7 @@
28 28
29#include <linux/memblock.h> 29#include <linux/memblock.h>
30#include <linux/dma-direct.h> 30#include <linux/dma-direct.h>
31#include <linux/dma-noncoherent.h>
31#include <linux/export.h> 32#include <linux/export.h>
32#include <xen/swiotlb-xen.h> 33#include <xen/swiotlb-xen.h>
33#include <xen/page.h> 34#include <xen/page.h>
@@ -391,6 +392,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
391 if (map == (phys_addr_t)DMA_MAPPING_ERROR) 392 if (map == (phys_addr_t)DMA_MAPPING_ERROR)
392 return DMA_MAPPING_ERROR; 393 return DMA_MAPPING_ERROR;
393 394
395 phys = map;
394 dev_addr = xen_phys_to_bus(map); 396 dev_addr = xen_phys_to_bus(map);
395 397
396 /* 398 /*
@@ -402,14 +404,9 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
402 return DMA_MAPPING_ERROR; 404 return DMA_MAPPING_ERROR;
403 } 405 }
404 406
405 page = pfn_to_page(map >> PAGE_SHIFT);
406 offset = map & ~PAGE_MASK;
407done: 407done:
408 /* 408 if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
409 * we are not interested in the dma_addr returned by xen_dma_map_page, 409 xen_dma_sync_for_device(dev, dev_addr, phys, size, dir);
410 * only in the potential cache flushes executed by the function.
411 */
412 xen_dma_map_page(dev, page, dev_addr, offset, size, dir, attrs);
413 return dev_addr; 410 return dev_addr;
414} 411}
415 412
@@ -421,35 +418,29 @@ done:
421 * After this call, reads by the cpu to the buffer are guaranteed to see 418 * After this call, reads by the cpu to the buffer are guaranteed to see
422 * whatever the device wrote there. 419 * whatever the device wrote there.
423 */ 420 */
424static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, 421static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
425 size_t size, enum dma_data_direction dir, 422 size_t size, enum dma_data_direction dir, unsigned long attrs)
426 unsigned long attrs)
427{ 423{
428 phys_addr_t paddr = xen_bus_to_phys(dev_addr); 424 phys_addr_t paddr = xen_bus_to_phys(dev_addr);
429 425
430 BUG_ON(dir == DMA_NONE); 426 BUG_ON(dir == DMA_NONE);
431 427
432 xen_dma_unmap_page(hwdev, dev_addr, size, dir, attrs); 428 if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
429 xen_dma_sync_for_cpu(hwdev, dev_addr, paddr, size, dir);
433 430
434 /* NOTE: We use dev_addr here, not paddr! */ 431 /* NOTE: We use dev_addr here, not paddr! */
435 if (is_xen_swiotlb_buffer(dev_addr)) 432 if (is_xen_swiotlb_buffer(dev_addr))
436 swiotlb_tbl_unmap_single(hwdev, paddr, size, size, dir, attrs); 433 swiotlb_tbl_unmap_single(hwdev, paddr, size, size, dir, attrs);
437} 434}
438 435
439static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
440 size_t size, enum dma_data_direction dir,
441 unsigned long attrs)
442{
443 xen_unmap_single(hwdev, dev_addr, size, dir, attrs);
444}
445
446static void 436static void
447xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, 437xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
448 size_t size, enum dma_data_direction dir) 438 size_t size, enum dma_data_direction dir)
449{ 439{
450 phys_addr_t paddr = xen_bus_to_phys(dma_addr); 440 phys_addr_t paddr = xen_bus_to_phys(dma_addr);
451 441
452 xen_dma_sync_single_for_cpu(dev, dma_addr, size, dir); 442 if (!dev_is_dma_coherent(dev))
443 xen_dma_sync_for_cpu(dev, dma_addr, paddr, size, dir);
453 444
454 if (is_xen_swiotlb_buffer(dma_addr)) 445 if (is_xen_swiotlb_buffer(dma_addr))
455 swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); 446 swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
@@ -464,7 +455,8 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
464 if (is_xen_swiotlb_buffer(dma_addr)) 455 if (is_xen_swiotlb_buffer(dma_addr))
465 swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); 456 swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
466 457
467 xen_dma_sync_single_for_device(dev, dma_addr, size, dir); 458 if (!dev_is_dma_coherent(dev))
459 xen_dma_sync_for_device(dev, dma_addr, paddr, size, dir);
468} 460}
469 461
470/* 462/*
@@ -481,7 +473,8 @@ xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
481 BUG_ON(dir == DMA_NONE); 473 BUG_ON(dir == DMA_NONE);
482 474
483 for_each_sg(sgl, sg, nelems, i) 475 for_each_sg(sgl, sg, nelems, i)
484 xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, attrs); 476 xen_swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg),
477 dir, attrs);
485 478
486} 479}
487 480
@@ -547,51 +540,6 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
547 return xen_virt_to_bus(xen_io_tlb_end - 1) <= mask; 540 return xen_virt_to_bus(xen_io_tlb_end - 1) <= mask;
548} 541}
549 542
550/*
551 * Create userspace mapping for the DMA-coherent memory.
552 * This function should be called with the pages from the current domain only,
553 * passing pages mapped from other domains would lead to memory corruption.
554 */
555static int
556xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
557 void *cpu_addr, dma_addr_t dma_addr, size_t size,
558 unsigned long attrs)
559{
560#ifdef CONFIG_ARM
561 if (xen_get_dma_ops(dev)->mmap)
562 return xen_get_dma_ops(dev)->mmap(dev, vma, cpu_addr,
563 dma_addr, size, attrs);
564#endif
565 return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
566}
567
568/*
569 * This function should be called with the pages from the current domain only,
570 * passing pages mapped from other domains would lead to memory corruption.
571 */
572static int
573xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
574 void *cpu_addr, dma_addr_t handle, size_t size,
575 unsigned long attrs)
576{
577#ifdef CONFIG_ARM
578 if (xen_get_dma_ops(dev)->get_sgtable) {
579#if 0
580 /*
581 * This check verifies that the page belongs to the current domain and
582 * is not one mapped from another domain.
583 * This check is for debug only, and should not go to production build
584 */
585 unsigned long bfn = PHYS_PFN(dma_to_phys(dev, handle));
586 BUG_ON (!page_is_ram(bfn));
587#endif
588 return xen_get_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr,
589 handle, size, attrs);
590 }
591#endif
592 return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size, attrs);
593}
594
595const struct dma_map_ops xen_swiotlb_dma_ops = { 543const struct dma_map_ops xen_swiotlb_dma_ops = {
596 .alloc = xen_swiotlb_alloc_coherent, 544 .alloc = xen_swiotlb_alloc_coherent,
597 .free = xen_swiotlb_free_coherent, 545 .free = xen_swiotlb_free_coherent,
@@ -604,6 +552,6 @@ const struct dma_map_ops xen_swiotlb_dma_ops = {
604 .map_page = xen_swiotlb_map_page, 552 .map_page = xen_swiotlb_map_page,
605 .unmap_page = xen_swiotlb_unmap_page, 553 .unmap_page = xen_swiotlb_unmap_page,
606 .dma_supported = xen_swiotlb_dma_supported, 554 .dma_supported = xen_swiotlb_dma_supported,
607 .mmap = xen_swiotlb_dma_mmap, 555 .mmap = dma_common_mmap,
608 .get_sgtable = xen_swiotlb_get_sgtable, 556 .get_sgtable = dma_common_get_sgtable,
609}; 557};
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3094f2d513b2..d9db32fb75ee 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1110,6 +1110,8 @@ extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
1110extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); 1110extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
1111extern void blk_queue_required_elevator_features(struct request_queue *q, 1111extern void blk_queue_required_elevator_features(struct request_queue *q,
1112 unsigned int features); 1112 unsigned int features);
1113extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
1114 struct device *dev);
1113 1115
1114/* 1116/*
1115 * Number of physical segments as sent to the device. 1117 * Number of physical segments as sent to the device.
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 14702e2d6fa8..4a1c4fca475a 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -131,6 +131,7 @@ struct dma_map_ops {
131 int (*dma_supported)(struct device *dev, u64 mask); 131 int (*dma_supported)(struct device *dev, u64 mask);
132 u64 (*get_required_mask)(struct device *dev); 132 u64 (*get_required_mask)(struct device *dev);
133 size_t (*max_mapping_size)(struct device *dev); 133 size_t (*max_mapping_size)(struct device *dev);
134 unsigned long (*get_merge_boundary)(struct device *dev);
134}; 135};
135 136
136#define DMA_MAPPING_ERROR (~(dma_addr_t)0) 137#define DMA_MAPPING_ERROR (~(dma_addr_t)0)
@@ -457,11 +458,13 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
457int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, 458int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
458 void *cpu_addr, dma_addr_t dma_addr, size_t size, 459 void *cpu_addr, dma_addr_t dma_addr, size_t size,
459 unsigned long attrs); 460 unsigned long attrs);
461bool dma_can_mmap(struct device *dev);
460int dma_supported(struct device *dev, u64 mask); 462int dma_supported(struct device *dev, u64 mask);
461int dma_set_mask(struct device *dev, u64 mask); 463int dma_set_mask(struct device *dev, u64 mask);
462int dma_set_coherent_mask(struct device *dev, u64 mask); 464int dma_set_coherent_mask(struct device *dev, u64 mask);
463u64 dma_get_required_mask(struct device *dev); 465u64 dma_get_required_mask(struct device *dev);
464size_t dma_max_mapping_size(struct device *dev); 466size_t dma_max_mapping_size(struct device *dev);
467unsigned long dma_get_merge_boundary(struct device *dev);
465#else /* CONFIG_HAS_DMA */ 468#else /* CONFIG_HAS_DMA */
466static inline dma_addr_t dma_map_page_attrs(struct device *dev, 469static inline dma_addr_t dma_map_page_attrs(struct device *dev,
467 struct page *page, size_t offset, size_t size, 470 struct page *page, size_t offset, size_t size,
@@ -547,6 +550,10 @@ static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
547{ 550{
548 return -ENXIO; 551 return -ENXIO;
549} 552}
553static inline bool dma_can_mmap(struct device *dev)
554{
555 return false;
556}
550static inline int dma_supported(struct device *dev, u64 mask) 557static inline int dma_supported(struct device *dev, u64 mask)
551{ 558{
552 return 0; 559 return 0;
@@ -567,6 +574,10 @@ static inline size_t dma_max_mapping_size(struct device *dev)
567{ 574{
568 return 0; 575 return 0;
569} 576}
577static inline unsigned long dma_get_merge_boundary(struct device *dev)
578{
579 return 0;
580}
570#endif /* CONFIG_HAS_DMA */ 581#endif /* CONFIG_HAS_DMA */
571 582
572static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, 583static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
@@ -610,16 +621,14 @@ extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
610 void *cpu_addr, dma_addr_t dma_addr, size_t size, 621 void *cpu_addr, dma_addr_t dma_addr, size_t size,
611 unsigned long attrs); 622 unsigned long attrs);
612 623
624struct page **dma_common_find_pages(void *cpu_addr);
613void *dma_common_contiguous_remap(struct page *page, size_t size, 625void *dma_common_contiguous_remap(struct page *page, size_t size,
614 unsigned long vm_flags,
615 pgprot_t prot, const void *caller); 626 pgprot_t prot, const void *caller);
616 627
617void *dma_common_pages_remap(struct page **pages, size_t size, 628void *dma_common_pages_remap(struct page **pages, size_t size,
618 unsigned long vm_flags, pgprot_t prot, 629 pgprot_t prot, const void *caller);
619 const void *caller); 630void dma_common_free_remap(void *cpu_addr, size_t size);
620void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags);
621 631
622int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot);
623bool dma_in_atomic_pool(void *start, size_t size); 632bool dma_in_atomic_pool(void *start, size_t size);
624void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags); 633void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags);
625bool dma_free_from_pool(void *start, size_t size); 634bool dma_free_from_pool(void *start, size_t size);
@@ -749,7 +758,6 @@ static inline int dma_get_cache_alignment(void)
749#ifdef CONFIG_DMA_DECLARE_COHERENT 758#ifdef CONFIG_DMA_DECLARE_COHERENT
750int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, 759int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
751 dma_addr_t device_addr, size_t size); 760 dma_addr_t device_addr, size_t size);
752void dma_release_declared_memory(struct device *dev);
753#else 761#else
754static inline int 762static inline int
755dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, 763dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
@@ -757,11 +765,6 @@ dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
757{ 765{
758 return -ENOSYS; 766 return -ENOSYS;
759} 767}
760
761static inline void
762dma_release_declared_memory(struct device *dev)
763{
764}
765#endif /* CONFIG_DMA_DECLARE_COHERENT */ 768#endif /* CONFIG_DMA_DECLARE_COHERENT */
766 769
767static inline void *dmam_alloc_coherent(struct device *dev, size_t size, 770static inline void *dmam_alloc_coherent(struct device *dev, size_t size,
@@ -781,9 +784,6 @@ static inline void *dma_alloc_wc(struct device *dev, size_t size,
781 784
782 return dma_alloc_attrs(dev, size, dma_addr, gfp, attrs); 785 return dma_alloc_attrs(dev, size, dma_addr, gfp, attrs);
783} 786}
784#ifndef dma_alloc_writecombine
785#define dma_alloc_writecombine dma_alloc_wc
786#endif
787 787
788static inline void dma_free_wc(struct device *dev, size_t size, 788static inline void dma_free_wc(struct device *dev, size_t size,
789 void *cpu_addr, dma_addr_t dma_addr) 789 void *cpu_addr, dma_addr_t dma_addr)
@@ -791,9 +791,6 @@ static inline void dma_free_wc(struct device *dev, size_t size,
791 return dma_free_attrs(dev, size, cpu_addr, dma_addr, 791 return dma_free_attrs(dev, size, cpu_addr, dma_addr,
792 DMA_ATTR_WRITE_COMBINE); 792 DMA_ATTR_WRITE_COMBINE);
793} 793}
794#ifndef dma_free_writecombine
795#define dma_free_writecombine dma_free_wc
796#endif
797 794
798static inline int dma_mmap_wc(struct device *dev, 795static inline int dma_mmap_wc(struct device *dev,
799 struct vm_area_struct *vma, 796 struct vm_area_struct *vma,
@@ -803,9 +800,6 @@ static inline int dma_mmap_wc(struct device *dev,
803 return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, 800 return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size,
804 DMA_ATTR_WRITE_COMBINE); 801 DMA_ATTR_WRITE_COMBINE);
805} 802}
806#ifndef dma_mmap_writecombine
807#define dma_mmap_writecombine dma_mmap_wc
808#endif
809 803
810#ifdef CONFIG_NEED_DMA_MAP_STATE 804#ifdef CONFIG_NEED_DMA_MAP_STATE
811#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME 805#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME
diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
index 0bff3d7fac92..dd3de6d88fc0 100644
--- a/include/linux/dma-noncoherent.h
+++ b/include/linux/dma-noncoherent.h
@@ -3,6 +3,7 @@
3#define _LINUX_DMA_NONCOHERENT_H 1 3#define _LINUX_DMA_NONCOHERENT_H 1
4 4
5#include <linux/dma-mapping.h> 5#include <linux/dma-mapping.h>
6#include <asm/pgtable.h>
6 7
7#ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H 8#ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H
8#include <asm/dma-coherence.h> 9#include <asm/dma-coherence.h>
@@ -42,10 +43,18 @@ void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
42 dma_addr_t dma_addr, unsigned long attrs); 43 dma_addr_t dma_addr, unsigned long attrs);
43long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, 44long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
44 dma_addr_t dma_addr); 45 dma_addr_t dma_addr);
45pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
46 unsigned long attrs);
47 46
48#ifdef CONFIG_MMU 47#ifdef CONFIG_MMU
48/*
49 * Page protection so that devices that can't snoop CPU caches can use the
50 * memory coherently. We default to pgprot_noncached which is usually used
51 * for ioremap as a safe bet, but architectures can override this with less
52 * strict semantics if possible.
53 */
54#ifndef pgprot_dmacoherent
55#define pgprot_dmacoherent(prot) pgprot_noncached(prot)
56#endif
57
49pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs); 58pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs);
50#else 59#else
51static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, 60static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot,
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 4704b77259ee..ba703384bea0 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -368,6 +368,7 @@ struct mmc_host {
368#define MMC_CAP2_CQE (1 << 23) /* Has eMMC command queue engine */ 368#define MMC_CAP2_CQE (1 << 23) /* Has eMMC command queue engine */
369#define MMC_CAP2_CQE_DCMD (1 << 24) /* CQE can issue a direct command */ 369#define MMC_CAP2_CQE_DCMD (1 << 24) /* CQE can issue a direct command */
370#define MMC_CAP2_AVOID_3_3V (1 << 25) /* Host must negotiate down from 3.3V */ 370#define MMC_CAP2_AVOID_3_3V (1 << 25) /* Host must negotiate down from 3.3V */
371#define MMC_CAP2_MERGE_CAPABLE (1 << 26) /* Host can merge a segment over the segment size */
371 372
372 int fixed_drv_type; /* fixed driver type for non-removable media */ 373 int fixed_drv_type; /* fixed driver type for non-removable media */
373 374
@@ -397,6 +398,7 @@ struct mmc_host {
397 unsigned int retune_paused:1; /* re-tuning is temporarily disabled */ 398 unsigned int retune_paused:1; /* re-tuning is temporarily disabled */
398 unsigned int use_blk_mq:1; /* use blk-mq */ 399 unsigned int use_blk_mq:1; /* use blk-mq */
399 unsigned int retune_crc_disable:1; /* don't trigger retune upon crc */ 400 unsigned int retune_crc_disable:1; /* don't trigger retune upon crc */
401 unsigned int can_dma_map_merge:1; /* merging can be used */
400 402
401 int rescan_disable; /* disable card detection */ 403 int rescan_disable; /* disable card detection */
402 int rescan_entered; /* used with nonremovable devices */ 404 int rescan_entered; /* used with nonremovable devices */
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 9b21d0047710..dfa718ffdd4f 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -18,6 +18,7 @@ struct notifier_block; /* in notifier.h */
18#define VM_ALLOC 0x00000002 /* vmalloc() */ 18#define VM_ALLOC 0x00000002 /* vmalloc() */
19#define VM_MAP 0x00000004 /* vmap()ed pages */ 19#define VM_MAP 0x00000004 /* vmap()ed pages */
20#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ 20#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */
21#define VM_DMA_COHERENT 0x00000010 /* dma_alloc_coherent */
21#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ 22#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */
22#define VM_NO_GUARD 0x00000040 /* don't add guard page */ 23#define VM_NO_GUARD 0x00000040 /* don't add guard page */
23#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ 24#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
@@ -26,6 +27,7 @@ struct notifier_block; /* in notifier.h */
26 * vfree_atomic(). 27 * vfree_atomic().
27 */ 28 */
28#define VM_FLUSH_RESET_PERMS 0x00000100 /* Reset direct map and flush TLB on unmap */ 29#define VM_FLUSH_RESET_PERMS 0x00000100 /* Reset direct map and flush TLB on unmap */
30
29/* bits [20..32] reserved for arch specific ioremap internals */ 31/* bits [20..32] reserved for arch specific ioremap internals */
30 32
31/* 33/*
diff --git a/include/xen/arm/hypervisor.h b/include/xen/arm/hypervisor.h
index 2982571f7cc1..43ef24dd030e 100644
--- a/include/xen/arm/hypervisor.h
+++ b/include/xen/arm/hypervisor.h
@@ -19,8 +19,6 @@ static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
19 return PARAVIRT_LAZY_NONE; 19 return PARAVIRT_LAZY_NONE;
20} 20}
21 21
22extern const struct dma_map_ops *xen_dma_ops;
23
24#ifdef CONFIG_XEN 22#ifdef CONFIG_XEN
25void __init xen_early_init(void); 23void __init xen_early_init(void);
26#else 24#else
diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h
index 2ca9164a79bf..b9cc11e887ed 100644
--- a/include/xen/arm/page-coherent.h
+++ b/include/xen/arm/page-coherent.h
@@ -2,15 +2,19 @@
2#ifndef _XEN_ARM_PAGE_COHERENT_H 2#ifndef _XEN_ARM_PAGE_COHERENT_H
3#define _XEN_ARM_PAGE_COHERENT_H 3#define _XEN_ARM_PAGE_COHERENT_H
4 4
5void __xen_dma_map_page(struct device *hwdev, struct page *page, 5#include <linux/dma-mapping.h>
6 dma_addr_t dev_addr, unsigned long offset, size_t size, 6#include <asm/page.h>
7 enum dma_data_direction dir, unsigned long attrs); 7
8void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, 8static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
9 size_t size, enum dma_data_direction dir, 9 dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
10 unsigned long attrs); 10{
11void __xen_dma_sync_single_for_cpu(struct device *hwdev, 11 return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
12 dma_addr_t handle, size_t size, enum dma_data_direction dir); 12}
13void __xen_dma_sync_single_for_device(struct device *hwdev, 13
14 dma_addr_t handle, size_t size, enum dma_data_direction dir); 14static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
15 void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
16{
17 dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs);
18}
15 19
16#endif /* _XEN_ARM_PAGE_COHERENT_H */ 20#endif /* _XEN_ARM_PAGE_COHERENT_H */
diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h
index 5e4b83f83dbc..d71380f6ed0b 100644
--- a/include/xen/swiotlb-xen.h
+++ b/include/xen/swiotlb-xen.h
@@ -4,6 +4,11 @@
4 4
5#include <linux/swiotlb.h> 5#include <linux/swiotlb.h>
6 6
7void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle,
8 phys_addr_t paddr, size_t size, enum dma_data_direction dir);
9void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle,
10 phys_addr_t paddr, size_t size, enum dma_data_direction dir);
11
7extern int xen_swiotlb_init(int verbose, bool early); 12extern int xen_swiotlb_init(int verbose, bool early);
8extern const struct dma_map_ops xen_swiotlb_dma_ops; 13extern const struct dma_map_ops xen_swiotlb_dma_ops;
9 14
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 9decbba255fc..73c5c2b8e824 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -20,6 +20,15 @@ config ARCH_HAS_DMA_COHERENCE_H
20config ARCH_HAS_DMA_SET_MASK 20config ARCH_HAS_DMA_SET_MASK
21 bool 21 bool
22 22
23#
24# Select this option if the architecture needs special handling for
25# DMA_ATTR_WRITE_COMBINE. Normally the "uncached" mapping should be what
26# people thing of when saying write combine, so very few platforms should
27# need to enable this.
28#
29config ARCH_HAS_DMA_WRITE_COMBINE
30 bool
31
23config DMA_DECLARE_COHERENT 32config DMA_DECLARE_COHERENT
24 bool 33 bool
25 34
@@ -45,9 +54,6 @@ config ARCH_HAS_DMA_PREP_COHERENT
45config ARCH_HAS_DMA_COHERENT_TO_PFN 54config ARCH_HAS_DMA_COHERENT_TO_PFN
46 bool 55 bool
47 56
48config ARCH_HAS_DMA_MMAP_PGPROT
49 bool
50
51config ARCH_HAS_FORCE_DMA_UNENCRYPTED 57config ARCH_HAS_FORCE_DMA_UNENCRYPTED
52 bool 58 bool
53 59
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 29fd6590dc1e..545e3869b0e3 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -122,18 +122,6 @@ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
122 dma_release_coherent_memory(mem); 122 dma_release_coherent_memory(mem);
123 return ret; 123 return ret;
124} 124}
125EXPORT_SYMBOL(dma_declare_coherent_memory);
126
127void dma_release_declared_memory(struct device *dev)
128{
129 struct dma_coherent_mem *mem = dev->dma_mem;
130
131 if (!mem)
132 return;
133 dma_release_coherent_memory(mem);
134 dev->dma_mem = NULL;
135}
136EXPORT_SYMBOL(dma_release_declared_memory);
137 125
138static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem, 126static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
139 ssize_t size, dma_addr_t *dma_handle) 127 ssize_t size, dma_addr_t *dma_handle)
@@ -288,7 +276,6 @@ int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
288 276
289 return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret); 277 return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret);
290} 278}
291EXPORT_SYMBOL(dma_mmap_from_dev_coherent);
292 279
293int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr, 280int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr,
294 size_t size, int *ret) 281 size_t size, int *ret)
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index b0038ca3aa92..64a3d294f4b4 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -136,17 +136,29 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
136 return ret; 136 return ret;
137} 137}
138 138
139/*
140 * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
141 * that the intention is to allow exporting memory allocated via the
142 * coherent DMA APIs through the dma_buf API, which only accepts a
143 * scattertable. This presents a couple of problems:
144 * 1. Not all memory allocated via the coherent DMA APIs is backed by
145 * a struct page
146 * 2. Passing coherent DMA memory into the streaming APIs is not allowed
147 * as we will try to flush the memory through a different alias to that
148 * actually being used (and the flushes are redundant.)
149 */
139int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, 150int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
140 void *cpu_addr, dma_addr_t dma_addr, size_t size, 151 void *cpu_addr, dma_addr_t dma_addr, size_t size,
141 unsigned long attrs) 152 unsigned long attrs)
142{ 153{
143 const struct dma_map_ops *ops = get_dma_ops(dev); 154 const struct dma_map_ops *ops = get_dma_ops(dev);
144 155
145 if (!dma_is_direct(ops) && ops->get_sgtable) 156 if (dma_is_direct(ops))
146 return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, 157 return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr,
147 attrs); 158 size, attrs);
148 return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, 159 if (!ops->get_sgtable)
149 attrs); 160 return -ENXIO;
161 return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs);
150} 162}
151EXPORT_SYMBOL(dma_get_sgtable_attrs); 163EXPORT_SYMBOL(dma_get_sgtable_attrs);
152 164
@@ -161,9 +173,11 @@ pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs)
161 (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) && 173 (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) &&
162 (attrs & DMA_ATTR_NON_CONSISTENT))) 174 (attrs & DMA_ATTR_NON_CONSISTENT)))
163 return prot; 175 return prot;
164 if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_MMAP_PGPROT)) 176#ifdef CONFIG_ARCH_HAS_DMA_WRITE_COMBINE
165 return arch_dma_mmap_pgprot(dev, prot, attrs); 177 if (attrs & DMA_ATTR_WRITE_COMBINE)
166 return pgprot_noncached(prot); 178 return pgprot_writecombine(prot);
179#endif
180 return pgprot_dmacoherent(prot);
167} 181}
168#endif /* CONFIG_MMU */ 182#endif /* CONFIG_MMU */
169 183
@@ -174,7 +188,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
174 void *cpu_addr, dma_addr_t dma_addr, size_t size, 188 void *cpu_addr, dma_addr_t dma_addr, size_t size,
175 unsigned long attrs) 189 unsigned long attrs)
176{ 190{
177#ifndef CONFIG_ARCH_NO_COHERENT_DMA_MMAP 191#ifdef CONFIG_MMU
178 unsigned long user_count = vma_pages(vma); 192 unsigned long user_count = vma_pages(vma);
179 unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; 193 unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
180 unsigned long off = vma->vm_pgoff; 194 unsigned long off = vma->vm_pgoff;
@@ -205,8 +219,29 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
205 user_count << PAGE_SHIFT, vma->vm_page_prot); 219 user_count << PAGE_SHIFT, vma->vm_page_prot);
206#else 220#else
207 return -ENXIO; 221 return -ENXIO;
208#endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */ 222#endif /* CONFIG_MMU */
223}
224
225/**
226 * dma_can_mmap - check if a given device supports dma_mmap_*
227 * @dev: device to check
228 *
229 * Returns %true if @dev supports dma_mmap_coherent() and dma_mmap_attrs() to
230 * map DMA allocations to userspace.
231 */
232bool dma_can_mmap(struct device *dev)
233{
234 const struct dma_map_ops *ops = get_dma_ops(dev);
235
236 if (dma_is_direct(ops)) {
237 return IS_ENABLED(CONFIG_MMU) &&
238 (dev_is_dma_coherent(dev) ||
239 IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN));
240 }
241
242 return ops->mmap != NULL;
209} 243}
244EXPORT_SYMBOL_GPL(dma_can_mmap);
210 245
211/** 246/**
212 * dma_mmap_attrs - map a coherent DMA allocation into user space 247 * dma_mmap_attrs - map a coherent DMA allocation into user space
@@ -227,31 +262,15 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
227{ 262{
228 const struct dma_map_ops *ops = get_dma_ops(dev); 263 const struct dma_map_ops *ops = get_dma_ops(dev);
229 264
230 if (!dma_is_direct(ops) && ops->mmap) 265 if (dma_is_direct(ops))
231 return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); 266 return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size,
232 return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); 267 attrs);
268 if (!ops->mmap)
269 return -ENXIO;
270 return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
233} 271}
234EXPORT_SYMBOL(dma_mmap_attrs); 272EXPORT_SYMBOL(dma_mmap_attrs);
235 273
236static u64 dma_default_get_required_mask(struct device *dev)
237{
238 u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT);
239 u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT));
240 u64 mask;
241
242 if (!high_totalram) {
243 /* convert to mask just covering totalram */
244 low_totalram = (1 << (fls(low_totalram) - 1));
245 low_totalram += low_totalram - 1;
246 mask = low_totalram;
247 } else {
248 high_totalram = (1 << (fls(high_totalram) - 1));
249 high_totalram += high_totalram - 1;
250 mask = (((u64)high_totalram) << 32) + 0xffffffff;
251 }
252 return mask;
253}
254
255u64 dma_get_required_mask(struct device *dev) 274u64 dma_get_required_mask(struct device *dev)
256{ 275{
257 const struct dma_map_ops *ops = get_dma_ops(dev); 276 const struct dma_map_ops *ops = get_dma_ops(dev);
@@ -260,7 +279,16 @@ u64 dma_get_required_mask(struct device *dev)
260 return dma_direct_get_required_mask(dev); 279 return dma_direct_get_required_mask(dev);
261 if (ops->get_required_mask) 280 if (ops->get_required_mask)
262 return ops->get_required_mask(dev); 281 return ops->get_required_mask(dev);
263 return dma_default_get_required_mask(dev); 282
283 /*
284 * We require every DMA ops implementation to at least support a 32-bit
285 * DMA mask (and use bounce buffering if that isn't supported in
286 * hardware). As the direct mapping code has its own routine to
287 * actually report an optimal mask we default to 32-bit here as that
288 * is the right thing for most IOMMUs, and at least not actively
289 * harmful in general.
290 */
291 return DMA_BIT_MASK(32);
264} 292}
265EXPORT_SYMBOL_GPL(dma_get_required_mask); 293EXPORT_SYMBOL_GPL(dma_get_required_mask);
266 294
@@ -405,3 +433,14 @@ size_t dma_max_mapping_size(struct device *dev)
405 return size; 433 return size;
406} 434}
407EXPORT_SYMBOL_GPL(dma_max_mapping_size); 435EXPORT_SYMBOL_GPL(dma_max_mapping_size);
436
437unsigned long dma_get_merge_boundary(struct device *dev)
438{
439 const struct dma_map_ops *ops = get_dma_ops(dev);
440
441 if (!ops || !ops->get_merge_boundary)
442 return 0; /* can't merge */
443
444 return ops->get_merge_boundary(dev);
445}
446EXPORT_SYMBOL_GPL(dma_get_merge_boundary);
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index ffe78f0b2fe4..ca4e5d44b571 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -11,13 +11,21 @@
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/vmalloc.h> 12#include <linux/vmalloc.h>
13 13
14struct page **dma_common_find_pages(void *cpu_addr)
15{
16 struct vm_struct *area = find_vm_area(cpu_addr);
17
18 if (!area || area->flags != VM_DMA_COHERENT)
19 return NULL;
20 return area->pages;
21}
22
14static struct vm_struct *__dma_common_pages_remap(struct page **pages, 23static struct vm_struct *__dma_common_pages_remap(struct page **pages,
15 size_t size, unsigned long vm_flags, pgprot_t prot, 24 size_t size, pgprot_t prot, const void *caller)
16 const void *caller)
17{ 25{
18 struct vm_struct *area; 26 struct vm_struct *area;
19 27
20 area = get_vm_area_caller(size, vm_flags, caller); 28 area = get_vm_area_caller(size, VM_DMA_COHERENT, caller);
21 if (!area) 29 if (!area)
22 return NULL; 30 return NULL;
23 31
@@ -34,12 +42,11 @@ static struct vm_struct *__dma_common_pages_remap(struct page **pages,
34 * Cannot be used in non-sleeping contexts 42 * Cannot be used in non-sleeping contexts
35 */ 43 */
36void *dma_common_pages_remap(struct page **pages, size_t size, 44void *dma_common_pages_remap(struct page **pages, size_t size,
37 unsigned long vm_flags, pgprot_t prot, 45 pgprot_t prot, const void *caller)
38 const void *caller)
39{ 46{
40 struct vm_struct *area; 47 struct vm_struct *area;
41 48
42 area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller); 49 area = __dma_common_pages_remap(pages, size, prot, caller);
43 if (!area) 50 if (!area)
44 return NULL; 51 return NULL;
45 52
@@ -53,7 +60,6 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
53 * Cannot be used in non-sleeping contexts 60 * Cannot be used in non-sleeping contexts
54 */ 61 */
55void *dma_common_contiguous_remap(struct page *page, size_t size, 62void *dma_common_contiguous_remap(struct page *page, size_t size,
56 unsigned long vm_flags,
57 pgprot_t prot, const void *caller) 63 pgprot_t prot, const void *caller)
58{ 64{
59 int i; 65 int i;
@@ -67,7 +73,7 @@ void *dma_common_contiguous_remap(struct page *page, size_t size,
67 for (i = 0; i < (size >> PAGE_SHIFT); i++) 73 for (i = 0; i < (size >> PAGE_SHIFT); i++)
68 pages[i] = nth_page(page, i); 74 pages[i] = nth_page(page, i);
69 75
70 area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller); 76 area = __dma_common_pages_remap(pages, size, prot, caller);
71 77
72 kfree(pages); 78 kfree(pages);
73 79
@@ -79,11 +85,11 @@ void *dma_common_contiguous_remap(struct page *page, size_t size,
79/* 85/*
80 * Unmaps a range previously mapped by dma_common_*_remap 86 * Unmaps a range previously mapped by dma_common_*_remap
81 */ 87 */
82void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) 88void dma_common_free_remap(void *cpu_addr, size_t size)
83{ 89{
84 struct vm_struct *area = find_vm_area(cpu_addr); 90 struct page **pages = dma_common_find_pages(cpu_addr);
85 91
86 if (!area || (area->flags & vm_flags) != vm_flags) { 92 if (!pages) {
87 WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); 93 WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
88 return; 94 return;
89 } 95 }
@@ -105,7 +111,16 @@ static int __init early_coherent_pool(char *p)
105} 111}
106early_param("coherent_pool", early_coherent_pool); 112early_param("coherent_pool", early_coherent_pool);
107 113
108int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot) 114static gfp_t dma_atomic_pool_gfp(void)
115{
116 if (IS_ENABLED(CONFIG_ZONE_DMA))
117 return GFP_DMA;
118 if (IS_ENABLED(CONFIG_ZONE_DMA32))
119 return GFP_DMA32;
120 return GFP_KERNEL;
121}
122
123static int __init dma_atomic_pool_init(void)
109{ 124{
110 unsigned int pool_size_order = get_order(atomic_pool_size); 125 unsigned int pool_size_order = get_order(atomic_pool_size);
111 unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT; 126 unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
@@ -117,7 +132,7 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
117 page = dma_alloc_from_contiguous(NULL, nr_pages, 132 page = dma_alloc_from_contiguous(NULL, nr_pages,
118 pool_size_order, false); 133 pool_size_order, false);
119 else 134 else
120 page = alloc_pages(gfp, pool_size_order); 135 page = alloc_pages(dma_atomic_pool_gfp(), pool_size_order);
121 if (!page) 136 if (!page)
122 goto out; 137 goto out;
123 138
@@ -127,8 +142,9 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
127 if (!atomic_pool) 142 if (!atomic_pool)
128 goto free_page; 143 goto free_page;
129 144
130 addr = dma_common_contiguous_remap(page, atomic_pool_size, VM_USERMAP, 145 addr = dma_common_contiguous_remap(page, atomic_pool_size,
131 prot, __builtin_return_address(0)); 146 pgprot_dmacoherent(PAGE_KERNEL),
147 __builtin_return_address(0));
132 if (!addr) 148 if (!addr)
133 goto destroy_genpool; 149 goto destroy_genpool;
134 150
@@ -143,7 +159,7 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
143 return 0; 159 return 0;
144 160
145remove_mapping: 161remove_mapping:
146 dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP); 162 dma_common_free_remap(addr, atomic_pool_size);
147destroy_genpool: 163destroy_genpool:
148 gen_pool_destroy(atomic_pool); 164 gen_pool_destroy(atomic_pool);
149 atomic_pool = NULL; 165 atomic_pool = NULL;
@@ -155,6 +171,7 @@ out:
155 atomic_pool_size / 1024); 171 atomic_pool_size / 1024);
156 return -ENOMEM; 172 return -ENOMEM;
157} 173}
174postcore_initcall(dma_atomic_pool_init);
158 175
159bool dma_in_atomic_pool(void *start, size_t size) 176bool dma_in_atomic_pool(void *start, size_t size)
160{ 177{
@@ -217,7 +234,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
217 arch_dma_prep_coherent(page, size); 234 arch_dma_prep_coherent(page, size);
218 235
219 /* create a coherent mapping */ 236 /* create a coherent mapping */
220 ret = dma_common_contiguous_remap(page, size, VM_USERMAP, 237 ret = dma_common_contiguous_remap(page, size,
221 dma_pgprot(dev, PAGE_KERNEL, attrs), 238 dma_pgprot(dev, PAGE_KERNEL, attrs),
222 __builtin_return_address(0)); 239 __builtin_return_address(0));
223 if (!ret) { 240 if (!ret) {
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 7ba11e12a11f..c1246d77cf75 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2993,7 +2993,7 @@ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
2993 if (!area) 2993 if (!area)
2994 return -EINVAL; 2994 return -EINVAL;
2995 2995
2996 if (!(area->flags & VM_USERMAP)) 2996 if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT)))
2997 return -EINVAL; 2997 return -EINVAL;
2998 2998
2999 if (kaddr + size > area->addr + get_vm_area_size(area)) 2999 if (kaddr + size > area->addr + get_vm_area_size(area))
@@ -3496,6 +3496,9 @@ static int s_show(struct seq_file *m, void *p)
3496 if (v->flags & VM_USERMAP) 3496 if (v->flags & VM_USERMAP)
3497 seq_puts(m, " user"); 3497 seq_puts(m, " user");
3498 3498
3499 if (v->flags & VM_DMA_COHERENT)
3500 seq_puts(m, " dma-coherent");
3501
3499 if (is_vmalloc_addr(v->pages)) 3502 if (is_vmalloc_addr(v->pages))
3500 seq_puts(m, " vpages"); 3503 seq_puts(m, " vpages");
3501 3504
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 11e653c8aa0e..91c6ad58729f 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -220,13 +220,12 @@ static bool hw_support_mmap(struct snd_pcm_substream *substream)
220{ 220{
221 if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP)) 221 if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP))
222 return false; 222 return false;
223 /* architecture supports dma_mmap_coherent()? */ 223
224#if defined(CONFIG_ARCH_NO_COHERENT_DMA_MMAP) || !defined(CONFIG_HAS_DMA) 224 if (substream->ops->mmap ||
225 if (!substream->ops->mmap && 225 substream->dma_buffer.dev.type != SNDRV_DMA_TYPE_DEV)
226 substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV) 226 return true;
227 return false; 227
228#endif 228 return dma_can_mmap(substream->dma_buffer.dev.dev);
229 return true;
230} 229}
231 230
232static int constrain_mask_params(struct snd_pcm_substream *substream, 231static int constrain_mask_params(struct snd_pcm_substream *substream,