aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel/pci-dma.c
diff options
context:
space:
mode:
authorMuli Ben-Yehuda <mulix@mulix.org>2006-01-11 16:44:42 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-11 22:04:55 -0500
commit17a941d854a3f7b0bb916fdeee4c9ffdcc19d8d3 (patch)
treeb6b3b55318336adf769bf57141a01a9defbbb202 /arch/x86_64/kernel/pci-dma.c
parent8a6fdd3e912e0ce6f723431d66baf704bf8a1d26 (diff)
[PATCH] x86_64: Use function pointers to call DMA mapping functions
AK: I hacked Muli's original patch a lot and there were a lot of changes - all bugs are probably to blame on me now. There were also some changes in the fall back behaviour for swiotlb - in particular it doesn't try to use GFP_DMA now anymore. Also all DMA mapping operations use the same core dma_alloc_coherent code with proper fallbacks now. And various other changes and cleanups. Known problems: iommu=force swiotlb=force together breaks needs more testing. This patch cleans up x86_64's DMA mapping dispatching code. Right now we have three possible IOMMU types: AGP GART, swiotlb and nommu, and in the future we will also have Xen's x86_64 swiotlb and other HW IOMMUs for x86_64. In order to support all of them cleanly, this patch: - introduces a struct dma_mapping_ops with function pointers for each of the DMA mapping operations of gart (AMD HW IOMMU), swiotlb (software IOMMU) and nommu (no IOMMU). - gets rid of: if (swiotlb) return swiotlb_xxx(); - PCI_DMA_BUS_IS_PHYS is now checked against the dma_ops being set This makes swiotlb faster by avoiding double copying in some cases. Signed-Off-By: Muli Ben-Yehuda <mulix@mulix.org> Signed-Off-By: Jon D. Mason <jdmason@us.ibm.com> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64/kernel/pci-dma.c')
-rw-r--r--arch/x86_64/kernel/pci-dma.c286
1 files changed, 246 insertions, 40 deletions
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index cab471cf3edb..2f5d8328e2b9 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -8,53 +8,259 @@
8#include <linux/pci.h> 8#include <linux/pci.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <asm/io.h> 10#include <asm/io.h>
11#include <asm/proto.h>
11 12
12/* Map a set of buffers described by scatterlist in streaming 13int iommu_merge __read_mostly = 0;
13 * mode for DMA. This is the scatter-gather version of the 14EXPORT_SYMBOL(iommu_merge);
14 * above pci_map_single interface. Here the scatter gather list 15
15 * elements are each tagged with the appropriate dma address 16dma_addr_t bad_dma_address __read_mostly;
16 * and length. They are obtained via sg_dma_{address,length}(SG). 17EXPORT_SYMBOL(bad_dma_address);
17 * 18
18 * NOTE: An implementation may be able to use a smaller number of 19/* This tells the BIO block layer to assume merging. Default to off
19 * DMA address/length pairs than there are SG table elements. 20 because we cannot guarantee merging later. */
20 * (for example via virtual mapping capabilities) 21int iommu_bio_merge __read_mostly = 0;
21 * The routine returns the number of addr/length pairs actually 22EXPORT_SYMBOL(iommu_bio_merge);
22 * used, at most nents. 23
23 * 24int iommu_sac_force __read_mostly = 0;
24 * Device ownership issues as mentioned above for pci_map_single are 25EXPORT_SYMBOL(iommu_sac_force);
25 * the same here. 26
27int no_iommu __read_mostly;
28#ifdef CONFIG_IOMMU_DEBUG
29int panic_on_overflow __read_mostly = 1;
30int force_iommu __read_mostly = 1;
31#else
32int panic_on_overflow __read_mostly = 0;
33int force_iommu __read_mostly= 0;
34#endif
35
36/* Dummy device used for NULL arguments (normally ISA). Better would
37 be probably a smaller DMA mask, but this is bug-to-bug compatible
38 to i386. */
39struct device fallback_dev = {
40 .bus_id = "fallback device",
41 .coherent_dma_mask = 0xffffffff,
42 .dma_mask = &fallback_dev.coherent_dma_mask,
43};
44
45/* Allocate DMA memory on node near device */
46noinline static void *
47dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
48{
49 struct page *page;
50 int node;
51 if (dev->bus == &pci_bus_type)
52 node = pcibus_to_node(to_pci_dev(dev)->bus);
53 else
54 node = numa_node_id();
55 page = alloc_pages_node(node, gfp, order);
56 return page ? page_address(page) : NULL;
57}
58
59/*
60 * Allocate memory for a coherent mapping.
26 */ 61 */
27int dma_map_sg(struct device *hwdev, struct scatterlist *sg, 62void *
28 int nents, int direction) 63dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
64 gfp_t gfp)
29{ 65{
30 int i; 66 void *memory;
31 67 unsigned long dma_mask = 0;
32 BUG_ON(direction == DMA_NONE); 68 u64 bus;
33 for (i = 0; i < nents; i++ ) { 69
34 struct scatterlist *s = &sg[i]; 70 if (!dev)
35 BUG_ON(!s->page); 71 dev = &fallback_dev;
36 s->dma_address = virt_to_bus(page_address(s->page) +s->offset); 72 dma_mask = dev->coherent_dma_mask;
37 s->dma_length = s->length; 73 if (dma_mask == 0)
74 dma_mask = 0xffffffff;
75
76 /* Kludge to make it bug-to-bug compatible with i386. i386
77 uses the normal dma_mask for alloc_coherent. */
78 dma_mask &= *dev->dma_mask;
79
80 /* Why <=? Even when the mask is smaller than 4GB it is often
81 larger than 16MB and in this case we have a chance of
82 finding fitting memory in the next higher zone first. If
83 not retry with true GFP_DMA. -AK */
84 if (dma_mask <= 0xffffffff)
85 gfp |= GFP_DMA32;
86
87 again:
88 memory = dma_alloc_pages(dev, gfp, get_order(size));
89 if (memory == NULL)
90 return NULL;
91
92 {
93 int high, mmu;
94 bus = virt_to_bus(memory);
95 high = (bus + size) >= dma_mask;
96 mmu = high;
97 if (force_iommu && !(gfp & GFP_DMA))
98 mmu = 1;
99 else if (high) {
100 free_pages((unsigned long)memory,
101 get_order(size));
102
103 /* Don't use the 16MB ZONE_DMA unless absolutely
104 needed. It's better to use remapping first. */
105 if (dma_mask < 0xffffffff && !(gfp & GFP_DMA)) {
106 gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
107 goto again;
108 }
109
110 if (dma_ops->alloc_coherent)
111 return dma_ops->alloc_coherent(dev, size,
112 dma_handle, gfp);
113 return NULL;
114 }
115
116 memset(memory, 0, size);
117 if (!mmu) {
118 *dma_handle = virt_to_bus(memory);
119 return memory;
120 }
121 }
122
123 if (dma_ops->alloc_coherent) {
124 free_pages((unsigned long)memory, get_order(size));
125 gfp &= ~(GFP_DMA|GFP_DMA32);
126 return dma_ops->alloc_coherent(dev, size, dma_handle, gfp);
127 }
128
129 if (dma_ops->map_simple) {
130 *dma_handle = dma_ops->map_simple(dev, memory,
131 size,
132 PCI_DMA_BIDIRECTIONAL);
133 if (*dma_handle != bad_dma_address)
134 return memory;
38 } 135 }
39 return nents;
40}
41 136
42EXPORT_SYMBOL(dma_map_sg); 137 if (panic_on_overflow)
138 panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",size);
139 free_pages((unsigned long)memory, get_order(size));
140 return NULL;
141}
142EXPORT_SYMBOL(dma_alloc_coherent);
43 143
44/* Unmap a set of streaming mode DMA translations. 144/*
45 * Again, cpu read rules concerning calls here are the same as for 145 * Unmap coherent memory.
46 * pci_unmap_single() above. 146 * The caller must ensure that the device has finished accessing the mapping.
47 */ 147 */
48void dma_unmap_sg(struct device *dev, struct scatterlist *sg, 148void dma_free_coherent(struct device *dev, size_t size,
49 int nents, int dir) 149 void *vaddr, dma_addr_t bus)
150{
151 if (dma_ops->unmap_single)
152 dma_ops->unmap_single(dev, bus, size, 0);
153 free_pages((unsigned long)vaddr, get_order(size));
154}
155EXPORT_SYMBOL(dma_free_coherent);
156
157int dma_supported(struct device *dev, u64 mask)
158{
159 if (dma_ops->dma_supported)
160 return dma_ops->dma_supported(dev, mask);
161
162 /* Copied from i386. Doesn't make much sense, because it will
163 only work for pci_alloc_coherent.
164 The caller just has to use GFP_DMA in this case. */
165 if (mask < 0x00ffffff)
166 return 0;
167
168 /* Tell the device to use SAC when IOMMU force is on. This
169 allows the driver to use cheaper accesses in some cases.
170
171 Problem with this is that if we overflow the IOMMU area and
172 return DAC as fallback address the device may not handle it
173 correctly.
174
175 As a special case some controllers have a 39bit address
176 mode that is as efficient as 32bit (aic79xx). Don't force
177 SAC for these. Assume all masks <= 40 bits are of this
178 type. Normally this doesn't make any difference, but gives
179 more gentle handling of IOMMU overflow. */
180 if (iommu_sac_force && (mask >= 0xffffffffffULL)) {
181 printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
182 return 0;
183 }
184
185 return 1;
186}
187EXPORT_SYMBOL(dma_supported);
188
189int dma_set_mask(struct device *dev, u64 mask)
50{ 190{
51 int i; 191 if (!dev->dma_mask || !dma_supported(dev, mask))
52 for (i = 0; i < nents; i++) { 192 return -EIO;
53 struct scatterlist *s = &sg[i]; 193 *dev->dma_mask = mask;
54 BUG_ON(s->page == NULL); 194 return 0;
55 BUG_ON(s->dma_address == 0);
56 dma_unmap_single(dev, s->dma_address, s->dma_length, dir);
57 }
58} 195}
196EXPORT_SYMBOL(dma_set_mask);
197
198/* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge]
199 [,forcesac][,fullflush][,nomerge][,biomerge]
200 size set size of iommu (in bytes)
201 noagp don't initialize the AGP driver and use full aperture.
202 off don't use the IOMMU
203 leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on)
204 memaper[=order] allocate an own aperture over RAM with size 32MB^order.
205 noforce don't force IOMMU usage. Default.
206 force Force IOMMU.
207 merge Do lazy merging. This may improve performance on some block devices.
208 Implies force (experimental)
209 biomerge Do merging at the BIO layer. This is more efficient than merge,
210 but should be only done with very big IOMMUs. Implies merge,force.
211 nomerge Don't do SG merging.
212 forcesac For SAC mode for masks <40bits (experimental)
213 fullflush Flush IOMMU on each allocation (default)
214 nofullflush Don't use IOMMU fullflush
215 allowed overwrite iommu off workarounds for specific chipsets.
216 soft Use software bounce buffering (default for Intel machines)
217 noaperture Don't touch the aperture for AGP.
218*/
219__init int iommu_setup(char *p)
220{
221 iommu_merge = 1;
59 222
60EXPORT_SYMBOL(dma_unmap_sg); 223 while (*p) {
224 if (!strncmp(p,"off",3))
225 no_iommu = 1;
226 /* gart_parse_options has more force support */
227 if (!strncmp(p,"force",5))
228 force_iommu = 1;
229 if (!strncmp(p,"noforce",7)) {
230 iommu_merge = 0;
231 force_iommu = 0;
232 }
233
234 if (!strncmp(p, "biomerge",8)) {
235 iommu_bio_merge = 4096;
236 iommu_merge = 1;
237 force_iommu = 1;
238 }
239 if (!strncmp(p, "panic",5))
240 panic_on_overflow = 1;
241 if (!strncmp(p, "nopanic",7))
242 panic_on_overflow = 0;
243 if (!strncmp(p, "merge",5)) {
244 iommu_merge = 1;
245 force_iommu = 1;
246 }
247 if (!strncmp(p, "nomerge",7))
248 iommu_merge = 0;
249 if (!strncmp(p, "forcesac",8))
250 iommu_sac_force = 1;
251
252#ifdef CONFIG_SWIOTLB
253 if (!strncmp(p, "soft",4))
254 swiotlb = 1;
255#endif
256
257#ifdef CONFIG_GART_IOMMU
258 gart_parse_options(p);
259#endif
260
261 p += strcspn(p, ",");
262 if (*p == ',')
263 ++p;
264 }
265 return 1;
266}