diff options
author | Muli Ben-Yehuda <mulix@mulix.org> | 2006-01-11 16:44:42 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-11 22:04:55 -0500 |
commit | 17a941d854a3f7b0bb916fdeee4c9ffdcc19d8d3 (patch) | |
tree | b6b3b55318336adf769bf57141a01a9defbbb202 /arch/x86_64/kernel/pci-dma.c | |
parent | 8a6fdd3e912e0ce6f723431d66baf704bf8a1d26 (diff) |
[PATCH] x86_64: Use function pointers to call DMA mapping functions
AK: I hacked Muli's original patch a lot and there were a lot
of changes - all bugs are probably to blame on me now.
There were also some changes in the fall back behaviour
for swiotlb - in particular it doesn't try to use GFP_DMA
now anymore. Also all DMA mapping operations use the
same core dma_alloc_coherent code with proper fallbacks now.
And various other changes and cleanups.
Known problems: iommu=force swiotlb=force together breaks
needs more testing.
This patch cleans up x86_64's DMA mapping dispatching code. Right now
we have three possible IOMMU types: AGP GART, swiotlb and nommu, and
in the future we will also have Xen's x86_64 swiotlb and other HW
IOMMUs for x86_64. In order to support all of them cleanly, this
patch:
- introduces a struct dma_mapping_ops with function pointers for each
of the DMA mapping operations of gart (AMD HW IOMMU), swiotlb
(software IOMMU) and nommu (no IOMMU).
- gets rid of:
if (swiotlb)
return swiotlb_xxx();
- PCI_DMA_BUS_IS_PHYS is now checked against the dma_ops being set
This makes swiotlb faster by avoiding double copying in some cases.
Signed-Off-By: Muli Ben-Yehuda <mulix@mulix.org>
Signed-Off-By: Jon D. Mason <jdmason@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64/kernel/pci-dma.c')
-rw-r--r-- | arch/x86_64/kernel/pci-dma.c | 286 |
1 files changed, 246 insertions, 40 deletions
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index cab471cf3edb..2f5d8328e2b9 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c | |||
@@ -8,53 +8,259 @@ | |||
8 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <asm/io.h> | 10 | #include <asm/io.h> |
11 | #include <asm/proto.h> | ||
11 | 12 | ||
12 | /* Map a set of buffers described by scatterlist in streaming | 13 | int iommu_merge __read_mostly = 0; |
13 | * mode for DMA. This is the scatter-gather version of the | 14 | EXPORT_SYMBOL(iommu_merge); |
14 | * above pci_map_single interface. Here the scatter gather list | 15 | |
15 | * elements are each tagged with the appropriate dma address | 16 | dma_addr_t bad_dma_address __read_mostly; |
16 | * and length. They are obtained via sg_dma_{address,length}(SG). | 17 | EXPORT_SYMBOL(bad_dma_address); |
17 | * | 18 | |
18 | * NOTE: An implementation may be able to use a smaller number of | 19 | /* This tells the BIO block layer to assume merging. Default to off |
19 | * DMA address/length pairs than there are SG table elements. | 20 | because we cannot guarantee merging later. */ |
20 | * (for example via virtual mapping capabilities) | 21 | int iommu_bio_merge __read_mostly = 0; |
21 | * The routine returns the number of addr/length pairs actually | 22 | EXPORT_SYMBOL(iommu_bio_merge); |
22 | * used, at most nents. | 23 | |
23 | * | 24 | int iommu_sac_force __read_mostly = 0; |
24 | * Device ownership issues as mentioned above for pci_map_single are | 25 | EXPORT_SYMBOL(iommu_sac_force); |
25 | * the same here. | 26 | |
27 | int no_iommu __read_mostly; | ||
28 | #ifdef CONFIG_IOMMU_DEBUG | ||
29 | int panic_on_overflow __read_mostly = 1; | ||
30 | int force_iommu __read_mostly = 1; | ||
31 | #else | ||
32 | int panic_on_overflow __read_mostly = 0; | ||
33 | int force_iommu __read_mostly= 0; | ||
34 | #endif | ||
35 | |||
36 | /* Dummy device used for NULL arguments (normally ISA). Better would | ||
37 | be probably a smaller DMA mask, but this is bug-to-bug compatible | ||
38 | to i386. */ | ||
39 | struct device fallback_dev = { | ||
40 | .bus_id = "fallback device", | ||
41 | .coherent_dma_mask = 0xffffffff, | ||
42 | .dma_mask = &fallback_dev.coherent_dma_mask, | ||
43 | }; | ||
44 | |||
45 | /* Allocate DMA memory on node near device */ | ||
46 | noinline static void * | ||
47 | dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) | ||
48 | { | ||
49 | struct page *page; | ||
50 | int node; | ||
51 | if (dev->bus == &pci_bus_type) | ||
52 | node = pcibus_to_node(to_pci_dev(dev)->bus); | ||
53 | else | ||
54 | node = numa_node_id(); | ||
55 | page = alloc_pages_node(node, gfp, order); | ||
56 | return page ? page_address(page) : NULL; | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * Allocate memory for a coherent mapping. | ||
26 | */ | 61 | */ |
27 | int dma_map_sg(struct device *hwdev, struct scatterlist *sg, | 62 | void * |
28 | int nents, int direction) | 63 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, |
64 | gfp_t gfp) | ||
29 | { | 65 | { |
30 | int i; | 66 | void *memory; |
31 | 67 | unsigned long dma_mask = 0; | |
32 | BUG_ON(direction == DMA_NONE); | 68 | u64 bus; |
33 | for (i = 0; i < nents; i++ ) { | 69 | |
34 | struct scatterlist *s = &sg[i]; | 70 | if (!dev) |
35 | BUG_ON(!s->page); | 71 | dev = &fallback_dev; |
36 | s->dma_address = virt_to_bus(page_address(s->page) +s->offset); | 72 | dma_mask = dev->coherent_dma_mask; |
37 | s->dma_length = s->length; | 73 | if (dma_mask == 0) |
74 | dma_mask = 0xffffffff; | ||
75 | |||
76 | /* Kludge to make it bug-to-bug compatible with i386. i386 | ||
77 | uses the normal dma_mask for alloc_coherent. */ | ||
78 | dma_mask &= *dev->dma_mask; | ||
79 | |||
80 | /* Why <=? Even when the mask is smaller than 4GB it is often | ||
81 | larger than 16MB and in this case we have a chance of | ||
82 | finding fitting memory in the next higher zone first. If | ||
83 | not retry with true GFP_DMA. -AK */ | ||
84 | if (dma_mask <= 0xffffffff) | ||
85 | gfp |= GFP_DMA32; | ||
86 | |||
87 | again: | ||
88 | memory = dma_alloc_pages(dev, gfp, get_order(size)); | ||
89 | if (memory == NULL) | ||
90 | return NULL; | ||
91 | |||
92 | { | ||
93 | int high, mmu; | ||
94 | bus = virt_to_bus(memory); | ||
95 | high = (bus + size) >= dma_mask; | ||
96 | mmu = high; | ||
97 | if (force_iommu && !(gfp & GFP_DMA)) | ||
98 | mmu = 1; | ||
99 | else if (high) { | ||
100 | free_pages((unsigned long)memory, | ||
101 | get_order(size)); | ||
102 | |||
103 | /* Don't use the 16MB ZONE_DMA unless absolutely | ||
104 | needed. It's better to use remapping first. */ | ||
105 | if (dma_mask < 0xffffffff && !(gfp & GFP_DMA)) { | ||
106 | gfp = (gfp & ~GFP_DMA32) | GFP_DMA; | ||
107 | goto again; | ||
108 | } | ||
109 | |||
110 | if (dma_ops->alloc_coherent) | ||
111 | return dma_ops->alloc_coherent(dev, size, | ||
112 | dma_handle, gfp); | ||
113 | return NULL; | ||
114 | } | ||
115 | |||
116 | memset(memory, 0, size); | ||
117 | if (!mmu) { | ||
118 | *dma_handle = virt_to_bus(memory); | ||
119 | return memory; | ||
120 | } | ||
121 | } | ||
122 | |||
123 | if (dma_ops->alloc_coherent) { | ||
124 | free_pages((unsigned long)memory, get_order(size)); | ||
125 | gfp &= ~(GFP_DMA|GFP_DMA32); | ||
126 | return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); | ||
127 | } | ||
128 | |||
129 | if (dma_ops->map_simple) { | ||
130 | *dma_handle = dma_ops->map_simple(dev, memory, | ||
131 | size, | ||
132 | PCI_DMA_BIDIRECTIONAL); | ||
133 | if (*dma_handle != bad_dma_address) | ||
134 | return memory; | ||
38 | } | 135 | } |
39 | return nents; | ||
40 | } | ||
41 | 136 | ||
42 | EXPORT_SYMBOL(dma_map_sg); | 137 | if (panic_on_overflow) |
138 | panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",size); | ||
139 | free_pages((unsigned long)memory, get_order(size)); | ||
140 | return NULL; | ||
141 | } | ||
142 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
43 | 143 | ||
44 | /* Unmap a set of streaming mode DMA translations. | 144 | /* |
45 | * Again, cpu read rules concerning calls here are the same as for | 145 | * Unmap coherent memory. |
46 | * pci_unmap_single() above. | 146 | * The caller must ensure that the device has finished accessing the mapping. |
47 | */ | 147 | */ |
48 | void dma_unmap_sg(struct device *dev, struct scatterlist *sg, | 148 | void dma_free_coherent(struct device *dev, size_t size, |
49 | int nents, int dir) | 149 | void *vaddr, dma_addr_t bus) |
150 | { | ||
151 | if (dma_ops->unmap_single) | ||
152 | dma_ops->unmap_single(dev, bus, size, 0); | ||
153 | free_pages((unsigned long)vaddr, get_order(size)); | ||
154 | } | ||
155 | EXPORT_SYMBOL(dma_free_coherent); | ||
156 | |||
157 | int dma_supported(struct device *dev, u64 mask) | ||
158 | { | ||
159 | if (dma_ops->dma_supported) | ||
160 | return dma_ops->dma_supported(dev, mask); | ||
161 | |||
162 | /* Copied from i386. Doesn't make much sense, because it will | ||
163 | only work for pci_alloc_coherent. | ||
164 | The caller just has to use GFP_DMA in this case. */ | ||
165 | if (mask < 0x00ffffff) | ||
166 | return 0; | ||
167 | |||
168 | /* Tell the device to use SAC when IOMMU force is on. This | ||
169 | allows the driver to use cheaper accesses in some cases. | ||
170 | |||
171 | Problem with this is that if we overflow the IOMMU area and | ||
172 | return DAC as fallback address the device may not handle it | ||
173 | correctly. | ||
174 | |||
175 | As a special case some controllers have a 39bit address | ||
176 | mode that is as efficient as 32bit (aic79xx). Don't force | ||
177 | SAC for these. Assume all masks <= 40 bits are of this | ||
178 | type. Normally this doesn't make any difference, but gives | ||
179 | more gentle handling of IOMMU overflow. */ | ||
180 | if (iommu_sac_force && (mask >= 0xffffffffffULL)) { | ||
181 | printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask); | ||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | return 1; | ||
186 | } | ||
187 | EXPORT_SYMBOL(dma_supported); | ||
188 | |||
189 | int dma_set_mask(struct device *dev, u64 mask) | ||
50 | { | 190 | { |
51 | int i; | 191 | if (!dev->dma_mask || !dma_supported(dev, mask)) |
52 | for (i = 0; i < nents; i++) { | 192 | return -EIO; |
53 | struct scatterlist *s = &sg[i]; | 193 | *dev->dma_mask = mask; |
54 | BUG_ON(s->page == NULL); | 194 | return 0; |
55 | BUG_ON(s->dma_address == 0); | ||
56 | dma_unmap_single(dev, s->dma_address, s->dma_length, dir); | ||
57 | } | ||
58 | } | 195 | } |
196 | EXPORT_SYMBOL(dma_set_mask); | ||
197 | |||
198 | /* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge] | ||
199 | [,forcesac][,fullflush][,nomerge][,biomerge] | ||
200 | size set size of iommu (in bytes) | ||
201 | noagp don't initialize the AGP driver and use full aperture. | ||
202 | off don't use the IOMMU | ||
203 | leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on) | ||
204 | memaper[=order] allocate an own aperture over RAM with size 32MB^order. | ||
205 | noforce don't force IOMMU usage. Default. | ||
206 | force Force IOMMU. | ||
207 | merge Do lazy merging. This may improve performance on some block devices. | ||
208 | Implies force (experimental) | ||
209 | biomerge Do merging at the BIO layer. This is more efficient than merge, | ||
210 | but should be only done with very big IOMMUs. Implies merge,force. | ||
211 | nomerge Don't do SG merging. | ||
212 | forcesac For SAC mode for masks <40bits (experimental) | ||
213 | fullflush Flush IOMMU on each allocation (default) | ||
214 | nofullflush Don't use IOMMU fullflush | ||
215 | allowed overwrite iommu off workarounds for specific chipsets. | ||
216 | soft Use software bounce buffering (default for Intel machines) | ||
217 | noaperture Don't touch the aperture for AGP. | ||
218 | */ | ||
219 | __init int iommu_setup(char *p) | ||
220 | { | ||
221 | iommu_merge = 1; | ||
59 | 222 | ||
60 | EXPORT_SYMBOL(dma_unmap_sg); | 223 | while (*p) { |
224 | if (!strncmp(p,"off",3)) | ||
225 | no_iommu = 1; | ||
226 | /* gart_parse_options has more force support */ | ||
227 | if (!strncmp(p,"force",5)) | ||
228 | force_iommu = 1; | ||
229 | if (!strncmp(p,"noforce",7)) { | ||
230 | iommu_merge = 0; | ||
231 | force_iommu = 0; | ||
232 | } | ||
233 | |||
234 | if (!strncmp(p, "biomerge",8)) { | ||
235 | iommu_bio_merge = 4096; | ||
236 | iommu_merge = 1; | ||
237 | force_iommu = 1; | ||
238 | } | ||
239 | if (!strncmp(p, "panic",5)) | ||
240 | panic_on_overflow = 1; | ||
241 | if (!strncmp(p, "nopanic",7)) | ||
242 | panic_on_overflow = 0; | ||
243 | if (!strncmp(p, "merge",5)) { | ||
244 | iommu_merge = 1; | ||
245 | force_iommu = 1; | ||
246 | } | ||
247 | if (!strncmp(p, "nomerge",7)) | ||
248 | iommu_merge = 0; | ||
249 | if (!strncmp(p, "forcesac",8)) | ||
250 | iommu_sac_force = 1; | ||
251 | |||
252 | #ifdef CONFIG_SWIOTLB | ||
253 | if (!strncmp(p, "soft",4)) | ||
254 | swiotlb = 1; | ||
255 | #endif | ||
256 | |||
257 | #ifdef CONFIG_GART_IOMMU | ||
258 | gart_parse_options(p); | ||
259 | #endif | ||
260 | |||
261 | p += strcspn(p, ","); | ||
262 | if (*p == ',') | ||
263 | ++p; | ||
264 | } | ||
265 | return 1; | ||
266 | } | ||