diff options
author | Muli Ben-Yehuda <mulix@mulix.org> | 2006-01-11 16:44:42 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-11 22:04:55 -0500 |
commit | 17a941d854a3f7b0bb916fdeee4c9ffdcc19d8d3 (patch) | |
tree | b6b3b55318336adf769bf57141a01a9defbbb202 /arch/x86_64/kernel/pci-gart.c | |
parent | 8a6fdd3e912e0ce6f723431d66baf704bf8a1d26 (diff) |
[PATCH] x86_64: Use function pointers to call DMA mapping functions
AK: I hacked Muli's original patch a lot and there were a lot
of changes - all bugs are probably to blame on me now.
There were also some changes in the fall back behaviour
for swiotlb - in particular it doesn't try to use GFP_DMA
now anymore. Also all DMA mapping operations use the
same core dma_alloc_coherent code with proper fallbacks now.
And various other changes and cleanups.
Known problems: iommu=force swiotlb=force together breaks
needs more testing.
This patch cleans up x86_64's DMA mapping dispatching code. Right now
we have three possible IOMMU types: AGP GART, swiotlb and nommu, and
in the future we will also have Xen's x86_64 swiotlb and other HW
IOMMUs for x86_64. In order to support all of them cleanly, this
patch:
- introduces a struct dma_mapping_ops with function pointers for each
of the DMA mapping operations of gart (AMD HW IOMMU), swiotlb
(software IOMMU) and nommu (no IOMMU).
- gets rid of:
if (swiotlb)
return swiotlb_xxx();
- PCI_DMA_BUS_IS_PHYS is now checked against the dma_ops being set
This makes swiotlb faster by avoiding double copying in some cases.
Signed-Off-By: Muli Ben-Yehuda <mulix@mulix.org>
Signed-Off-By: Jon D. Mason <jdmason@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64/kernel/pci-gart.c')
-rw-r--r-- | arch/x86_64/kernel/pci-gart.c | 411 |
1 files changed, 97 insertions, 314 deletions
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 116e00f1bb7b..c37fc7726ba6 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c | |||
@@ -30,8 +30,8 @@ | |||
30 | #include <asm/proto.h> | 30 | #include <asm/proto.h> |
31 | #include <asm/cacheflush.h> | 31 | #include <asm/cacheflush.h> |
32 | #include <asm/kdebug.h> | 32 | #include <asm/kdebug.h> |
33 | 33 | #include <asm/swiotlb.h> | |
34 | dma_addr_t bad_dma_address; | 34 | #include <asm/dma.h> |
35 | 35 | ||
36 | unsigned long iommu_bus_base; /* GART remapping area (physical) */ | 36 | unsigned long iommu_bus_base; /* GART remapping area (physical) */ |
37 | static unsigned long iommu_size; /* size of remapping area bytes */ | 37 | static unsigned long iommu_size; /* size of remapping area bytes */ |
@@ -39,18 +39,6 @@ static unsigned long iommu_pages; /* .. and in pages */ | |||
39 | 39 | ||
40 | u32 *iommu_gatt_base; /* Remapping table */ | 40 | u32 *iommu_gatt_base; /* Remapping table */ |
41 | 41 | ||
42 | int no_iommu; | ||
43 | static int no_agp; | ||
44 | #ifdef CONFIG_IOMMU_DEBUG | ||
45 | int panic_on_overflow = 1; | ||
46 | int force_iommu = 1; | ||
47 | #else | ||
48 | int panic_on_overflow = 0; | ||
49 | int force_iommu = 0; | ||
50 | #endif | ||
51 | int iommu_merge = 1; | ||
52 | int iommu_sac_force = 0; | ||
53 | |||
54 | /* If this is disabled the IOMMU will use an optimized flushing strategy | 42 | /* If this is disabled the IOMMU will use an optimized flushing strategy |
55 | of only flushing when an mapping is reused. With it true the GART is flushed | 43 | of only flushing when an mapping is reused. With it true the GART is flushed |
56 | for every mapping. Problem is that doing the lazy flush seems to trigger | 44 | for every mapping. Problem is that doing the lazy flush seems to trigger |
@@ -58,10 +46,6 @@ int iommu_sac_force = 0; | |||
58 | also seen with Qlogic at least). */ | 46 | also seen with Qlogic at least). */ |
59 | int iommu_fullflush = 1; | 47 | int iommu_fullflush = 1; |
60 | 48 | ||
61 | /* This tells the BIO block layer to assume merging. Default to off | ||
62 | because we cannot guarantee merging later. */ | ||
63 | int iommu_bio_merge = 0; | ||
64 | |||
65 | #define MAX_NB 8 | 49 | #define MAX_NB 8 |
66 | 50 | ||
67 | /* Allocation bitmap for the remapping area */ | 51 | /* Allocation bitmap for the remapping area */ |
@@ -102,16 +86,6 @@ AGPEXTERN __u32 *agp_gatt_table; | |||
102 | 86 | ||
103 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ | 87 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ |
104 | static int need_flush; /* global flush state. set for each gart wrap */ | 88 | static int need_flush; /* global flush state. set for each gart wrap */ |
105 | static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem, | ||
106 | size_t size, int dir, int do_panic); | ||
107 | |||
108 | /* Dummy device used for NULL arguments (normally ISA). Better would | ||
109 | be probably a smaller DMA mask, but this is bug-to-bug compatible to i386. */ | ||
110 | static struct device fallback_dev = { | ||
111 | .bus_id = "fallback device", | ||
112 | .coherent_dma_mask = 0xffffffff, | ||
113 | .dma_mask = &fallback_dev.coherent_dma_mask, | ||
114 | }; | ||
115 | 89 | ||
116 | static unsigned long alloc_iommu(int size) | 90 | static unsigned long alloc_iommu(int size) |
117 | { | 91 | { |
@@ -185,114 +159,7 @@ static void flush_gart(struct device *dev) | |||
185 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 159 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
186 | } | 160 | } |
187 | 161 | ||
188 | /* Allocate DMA memory on node near device */ | ||
189 | noinline | ||
190 | static void *dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) | ||
191 | { | ||
192 | struct page *page; | ||
193 | int node; | ||
194 | if (dev->bus == &pci_bus_type) | ||
195 | node = pcibus_to_node(to_pci_dev(dev)->bus); | ||
196 | else | ||
197 | node = numa_node_id(); | ||
198 | page = alloc_pages_node(node, gfp, order); | ||
199 | return page ? page_address(page) : NULL; | ||
200 | } | ||
201 | |||
202 | /* | ||
203 | * Allocate memory for a coherent mapping. | ||
204 | */ | ||
205 | void * | ||
206 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | ||
207 | gfp_t gfp) | ||
208 | { | ||
209 | void *memory; | ||
210 | unsigned long dma_mask = 0; | ||
211 | u64 bus; | ||
212 | 162 | ||
213 | if (!dev) | ||
214 | dev = &fallback_dev; | ||
215 | dma_mask = dev->coherent_dma_mask; | ||
216 | if (dma_mask == 0) | ||
217 | dma_mask = 0xffffffff; | ||
218 | |||
219 | /* Kludge to make it bug-to-bug compatible with i386. i386 | ||
220 | uses the normal dma_mask for alloc_coherent. */ | ||
221 | dma_mask &= *dev->dma_mask; | ||
222 | |||
223 | /* Why <=? Even when the mask is smaller than 4GB it is often larger | ||
224 | than 16MB and in this case we have a chance of finding fitting memory | ||
225 | in the next higher zone first. If not retry with true GFP_DMA. -AK */ | ||
226 | if (dma_mask <= 0xffffffff) | ||
227 | gfp |= GFP_DMA32; | ||
228 | |||
229 | again: | ||
230 | memory = dma_alloc_pages(dev, gfp, get_order(size)); | ||
231 | if (memory == NULL) | ||
232 | return NULL; | ||
233 | |||
234 | { | ||
235 | int high, mmu; | ||
236 | bus = virt_to_bus(memory); | ||
237 | high = (bus + size) >= dma_mask; | ||
238 | mmu = high; | ||
239 | if (force_iommu && !(gfp & GFP_DMA)) | ||
240 | mmu = 1; | ||
241 | if (no_iommu || dma_mask < 0xffffffffUL) { | ||
242 | if (high) { | ||
243 | free_pages((unsigned long)memory, | ||
244 | get_order(size)); | ||
245 | |||
246 | if (swiotlb) { | ||
247 | return | ||
248 | swiotlb_alloc_coherent(dev, size, | ||
249 | dma_handle, | ||
250 | gfp); | ||
251 | } | ||
252 | |||
253 | if (!(gfp & GFP_DMA)) { | ||
254 | gfp = (gfp & ~GFP_DMA32) | GFP_DMA; | ||
255 | goto again; | ||
256 | } | ||
257 | return NULL; | ||
258 | } | ||
259 | mmu = 0; | ||
260 | } | ||
261 | memset(memory, 0, size); | ||
262 | if (!mmu) { | ||
263 | *dma_handle = virt_to_bus(memory); | ||
264 | return memory; | ||
265 | } | ||
266 | } | ||
267 | |||
268 | *dma_handle = dma_map_area(dev, bus, size, PCI_DMA_BIDIRECTIONAL, 0); | ||
269 | if (*dma_handle == bad_dma_address) | ||
270 | goto error; | ||
271 | flush_gart(dev); | ||
272 | return memory; | ||
273 | |||
274 | error: | ||
275 | if (panic_on_overflow) | ||
276 | panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", size); | ||
277 | free_pages((unsigned long)memory, get_order(size)); | ||
278 | return NULL; | ||
279 | } | ||
280 | |||
281 | /* | ||
282 | * Unmap coherent memory. | ||
283 | * The caller must ensure that the device has finished accessing the mapping. | ||
284 | */ | ||
285 | void dma_free_coherent(struct device *dev, size_t size, | ||
286 | void *vaddr, dma_addr_t bus) | ||
287 | { | ||
288 | if (swiotlb) { | ||
289 | swiotlb_free_coherent(dev, size, vaddr, bus); | ||
290 | return; | ||
291 | } | ||
292 | |||
293 | dma_unmap_single(dev, bus, size, 0); | ||
294 | free_pages((unsigned long)vaddr, get_order(size)); | ||
295 | } | ||
296 | 163 | ||
297 | #ifdef CONFIG_IOMMU_LEAK | 164 | #ifdef CONFIG_IOMMU_LEAK |
298 | 165 | ||
@@ -326,7 +193,7 @@ void dump_leak(void) | |||
326 | #define CLEAR_LEAK(x) | 193 | #define CLEAR_LEAK(x) |
327 | #endif | 194 | #endif |
328 | 195 | ||
329 | static void iommu_full(struct device *dev, size_t size, int dir, int do_panic) | 196 | static void iommu_full(struct device *dev, size_t size, int dir) |
330 | { | 197 | { |
331 | /* | 198 | /* |
332 | * Ran out of IOMMU space for this operation. This is very bad. | 199 | * Ran out of IOMMU space for this operation. This is very bad. |
@@ -342,11 +209,11 @@ static void iommu_full(struct device *dev, size_t size, int dir, int do_panic) | |||
342 | "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", | 209 | "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", |
343 | size, dev->bus_id); | 210 | size, dev->bus_id); |
344 | 211 | ||
345 | if (size > PAGE_SIZE*EMERGENCY_PAGES && do_panic) { | 212 | if (size > PAGE_SIZE*EMERGENCY_PAGES) { |
346 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) | 213 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) |
347 | panic("PCI-DMA: Memory would be corrupted\n"); | 214 | panic("PCI-DMA: Memory would be corrupted\n"); |
348 | if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) | 215 | if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) |
349 | panic("PCI-DMA: Random memory would be DMAed\n"); | 216 | panic(KERN_ERR "PCI-DMA: Random memory would be DMAed\n"); |
350 | } | 217 | } |
351 | 218 | ||
352 | #ifdef CONFIG_IOMMU_LEAK | 219 | #ifdef CONFIG_IOMMU_LEAK |
@@ -385,8 +252,8 @@ static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t | |||
385 | /* Map a single continuous physical area into the IOMMU. | 252 | /* Map a single continuous physical area into the IOMMU. |
386 | * Caller needs to check if the iommu is needed and flush. | 253 | * Caller needs to check if the iommu is needed and flush. |
387 | */ | 254 | */ |
388 | static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem, | 255 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, |
389 | size_t size, int dir, int do_panic) | 256 | size_t size, int dir) |
390 | { | 257 | { |
391 | unsigned long npages = to_pages(phys_mem, size); | 258 | unsigned long npages = to_pages(phys_mem, size); |
392 | unsigned long iommu_page = alloc_iommu(npages); | 259 | unsigned long iommu_page = alloc_iommu(npages); |
@@ -396,7 +263,7 @@ static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem, | |||
396 | return phys_mem; | 263 | return phys_mem; |
397 | if (panic_on_overflow) | 264 | if (panic_on_overflow) |
398 | panic("dma_map_area overflow %lu bytes\n", size); | 265 | panic("dma_map_area overflow %lu bytes\n", size); |
399 | iommu_full(dev, size, dir, do_panic); | 266 | iommu_full(dev, size, dir); |
400 | return bad_dma_address; | 267 | return bad_dma_address; |
401 | } | 268 | } |
402 | 269 | ||
@@ -408,15 +275,21 @@ static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem, | |||
408 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); | 275 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); |
409 | } | 276 | } |
410 | 277 | ||
278 | static dma_addr_t gart_map_simple(struct device *dev, char *buf, | ||
279 | size_t size, int dir) | ||
280 | { | ||
281 | dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); | ||
282 | flush_gart(dev); | ||
283 | return map; | ||
284 | } | ||
285 | |||
411 | /* Map a single area into the IOMMU */ | 286 | /* Map a single area into the IOMMU */ |
412 | dma_addr_t dma_map_single(struct device *dev, void *addr, size_t size, int dir) | 287 | dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir) |
413 | { | 288 | { |
414 | unsigned long phys_mem, bus; | 289 | unsigned long phys_mem, bus; |
415 | 290 | ||
416 | BUG_ON(dir == DMA_NONE); | 291 | BUG_ON(dir == DMA_NONE); |
417 | 292 | ||
418 | if (swiotlb) | ||
419 | return swiotlb_map_single(dev,addr,size,dir); | ||
420 | if (!dev) | 293 | if (!dev) |
421 | dev = &fallback_dev; | 294 | dev = &fallback_dev; |
422 | 295 | ||
@@ -424,10 +297,24 @@ dma_addr_t dma_map_single(struct device *dev, void *addr, size_t size, int dir) | |||
424 | if (!need_iommu(dev, phys_mem, size)) | 297 | if (!need_iommu(dev, phys_mem, size)) |
425 | return phys_mem; | 298 | return phys_mem; |
426 | 299 | ||
427 | bus = dma_map_area(dev, phys_mem, size, dir, 1); | 300 | bus = gart_map_simple(dev, addr, size, dir); |
428 | flush_gart(dev); | ||
429 | return bus; | 301 | return bus; |
430 | } | 302 | } |
303 | |||
304 | /* | ||
305 | * Wrapper for pci_unmap_single working with scatterlists. | ||
306 | */ | ||
307 | void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | ||
308 | { | ||
309 | int i; | ||
310 | |||
311 | for (i = 0; i < nents; i++) { | ||
312 | struct scatterlist *s = &sg[i]; | ||
313 | if (!s->dma_length || !s->length) | ||
314 | break; | ||
315 | dma_unmap_single(dev, s->dma_address, s->dma_length, dir); | ||
316 | } | ||
317 | } | ||
431 | 318 | ||
432 | /* Fallback for dma_map_sg in case of overflow */ | 319 | /* Fallback for dma_map_sg in case of overflow */ |
433 | static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | 320 | static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, |
@@ -443,10 +330,10 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
443 | struct scatterlist *s = &sg[i]; | 330 | struct scatterlist *s = &sg[i]; |
444 | unsigned long addr = page_to_phys(s->page) + s->offset; | 331 | unsigned long addr = page_to_phys(s->page) + s->offset; |
445 | if (nonforced_iommu(dev, addr, s->length)) { | 332 | if (nonforced_iommu(dev, addr, s->length)) { |
446 | addr = dma_map_area(dev, addr, s->length, dir, 0); | 333 | addr = dma_map_area(dev, addr, s->length, dir); |
447 | if (addr == bad_dma_address) { | 334 | if (addr == bad_dma_address) { |
448 | if (i > 0) | 335 | if (i > 0) |
449 | dma_unmap_sg(dev, sg, i, dir); | 336 | gart_unmap_sg(dev, sg, i, dir); |
450 | nents = 0; | 337 | nents = 0; |
451 | sg[0].dma_length = 0; | 338 | sg[0].dma_length = 0; |
452 | break; | 339 | break; |
@@ -515,7 +402,7 @@ static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat, | |||
515 | * DMA map all entries in a scatterlist. | 402 | * DMA map all entries in a scatterlist. |
516 | * Merge chunks that have page aligned sizes into a continuous mapping. | 403 | * Merge chunks that have page aligned sizes into a continuous mapping. |
517 | */ | 404 | */ |
518 | int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | 405 | int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) |
519 | { | 406 | { |
520 | int i; | 407 | int i; |
521 | int out; | 408 | int out; |
@@ -527,8 +414,6 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
527 | if (nents == 0) | 414 | if (nents == 0) |
528 | return 0; | 415 | return 0; |
529 | 416 | ||
530 | if (swiotlb) | ||
531 | return swiotlb_map_sg(dev,sg,nents,dir); | ||
532 | if (!dev) | 417 | if (!dev) |
533 | dev = &fallback_dev; | 418 | dev = &fallback_dev; |
534 | 419 | ||
@@ -571,13 +456,13 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
571 | 456 | ||
572 | error: | 457 | error: |
573 | flush_gart(NULL); | 458 | flush_gart(NULL); |
574 | dma_unmap_sg(dev, sg, nents, dir); | 459 | gart_unmap_sg(dev, sg, nents, dir); |
575 | /* When it was forced try again unforced */ | 460 | /* When it was forced try again unforced */ |
576 | if (force_iommu) | 461 | if (force_iommu) |
577 | return dma_map_sg_nonforce(dev, sg, nents, dir); | 462 | return dma_map_sg_nonforce(dev, sg, nents, dir); |
578 | if (panic_on_overflow) | 463 | if (panic_on_overflow) |
579 | panic("dma_map_sg: overflow on %lu pages\n", pages); | 464 | panic("dma_map_sg: overflow on %lu pages\n", pages); |
580 | iommu_full(dev, pages << PAGE_SHIFT, dir, 0); | 465 | iommu_full(dev, pages << PAGE_SHIFT, dir); |
581 | for (i = 0; i < nents; i++) | 466 | for (i = 0; i < nents; i++) |
582 | sg[i].dma_address = bad_dma_address; | 467 | sg[i].dma_address = bad_dma_address; |
583 | return 0; | 468 | return 0; |
@@ -586,18 +471,13 @@ error: | |||
586 | /* | 471 | /* |
587 | * Free a DMA mapping. | 472 | * Free a DMA mapping. |
588 | */ | 473 | */ |
589 | void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, | 474 | void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, |
590 | size_t size, int direction) | 475 | size_t size, int direction) |
591 | { | 476 | { |
592 | unsigned long iommu_page; | 477 | unsigned long iommu_page; |
593 | int npages; | 478 | int npages; |
594 | int i; | 479 | int i; |
595 | 480 | ||
596 | if (swiotlb) { | ||
597 | swiotlb_unmap_single(dev,dma_addr,size,direction); | ||
598 | return; | ||
599 | } | ||
600 | |||
601 | if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || | 481 | if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || |
602 | dma_addr >= iommu_bus_base + iommu_size) | 482 | dma_addr >= iommu_bus_base + iommu_size) |
603 | return; | 483 | return; |
@@ -610,68 +490,7 @@ void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
610 | free_iommu(iommu_page, npages); | 490 | free_iommu(iommu_page, npages); |
611 | } | 491 | } |
612 | 492 | ||
613 | /* | 493 | static int no_agp; |
614 | * Wrapper for pci_unmap_single working with scatterlists. | ||
615 | */ | ||
616 | void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | ||
617 | { | ||
618 | int i; | ||
619 | if (swiotlb) { | ||
620 | swiotlb_unmap_sg(dev,sg,nents,dir); | ||
621 | return; | ||
622 | } | ||
623 | for (i = 0; i < nents; i++) { | ||
624 | struct scatterlist *s = &sg[i]; | ||
625 | if (!s->dma_length || !s->length) | ||
626 | break; | ||
627 | dma_unmap_single(dev, s->dma_address, s->dma_length, dir); | ||
628 | } | ||
629 | } | ||
630 | |||
631 | int dma_supported(struct device *dev, u64 mask) | ||
632 | { | ||
633 | /* Copied from i386. Doesn't make much sense, because it will | ||
634 | only work for pci_alloc_coherent. | ||
635 | The caller just has to use GFP_DMA in this case. */ | ||
636 | if (mask < 0x00ffffff) | ||
637 | return 0; | ||
638 | |||
639 | /* Tell the device to use SAC when IOMMU force is on. | ||
640 | This allows the driver to use cheaper accesses in some cases. | ||
641 | |||
642 | Problem with this is that if we overflow the IOMMU area | ||
643 | and return DAC as fallback address the device may not handle it correctly. | ||
644 | |||
645 | As a special case some controllers have a 39bit address mode | ||
646 | that is as efficient as 32bit (aic79xx). Don't force SAC for these. | ||
647 | Assume all masks <= 40 bits are of this type. Normally this doesn't | ||
648 | make any difference, but gives more gentle handling of IOMMU overflow. */ | ||
649 | if (iommu_sac_force && (mask >= 0xffffffffffULL)) { | ||
650 | printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask); | ||
651 | return 0; | ||
652 | } | ||
653 | |||
654 | return 1; | ||
655 | } | ||
656 | |||
657 | int dma_get_cache_alignment(void) | ||
658 | { | ||
659 | return boot_cpu_data.x86_clflush_size; | ||
660 | } | ||
661 | |||
662 | EXPORT_SYMBOL(dma_unmap_sg); | ||
663 | EXPORT_SYMBOL(dma_map_sg); | ||
664 | EXPORT_SYMBOL(dma_map_single); | ||
665 | EXPORT_SYMBOL(dma_unmap_single); | ||
666 | EXPORT_SYMBOL(dma_supported); | ||
667 | EXPORT_SYMBOL(no_iommu); | ||
668 | EXPORT_SYMBOL(force_iommu); | ||
669 | EXPORT_SYMBOL(bad_dma_address); | ||
670 | EXPORT_SYMBOL(iommu_bio_merge); | ||
671 | EXPORT_SYMBOL(iommu_sac_force); | ||
672 | EXPORT_SYMBOL(dma_get_cache_alignment); | ||
673 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
674 | EXPORT_SYMBOL(dma_free_coherent); | ||
675 | 494 | ||
676 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | 495 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) |
677 | { | 496 | { |
@@ -778,6 +597,21 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
778 | 597 | ||
779 | extern int agp_amd64_init(void); | 598 | extern int agp_amd64_init(void); |
780 | 599 | ||
600 | static struct dma_mapping_ops gart_dma_ops = { | ||
601 | .mapping_error = NULL, | ||
602 | .map_single = gart_map_single, | ||
603 | .map_simple = gart_map_simple, | ||
604 | .unmap_single = gart_unmap_single, | ||
605 | .sync_single_for_cpu = NULL, | ||
606 | .sync_single_for_device = NULL, | ||
607 | .sync_single_range_for_cpu = NULL, | ||
608 | .sync_single_range_for_device = NULL, | ||
609 | .sync_sg_for_cpu = NULL, | ||
610 | .sync_sg_for_device = NULL, | ||
611 | .map_sg = gart_map_sg, | ||
612 | .unmap_sg = gart_unmap_sg, | ||
613 | }; | ||
614 | |||
781 | static int __init pci_iommu_init(void) | 615 | static int __init pci_iommu_init(void) |
782 | { | 616 | { |
783 | struct agp_kern_info info; | 617 | struct agp_kern_info info; |
@@ -799,16 +633,15 @@ static int __init pci_iommu_init(void) | |||
799 | 633 | ||
800 | if (swiotlb) { | 634 | if (swiotlb) { |
801 | no_iommu = 1; | 635 | no_iommu = 1; |
802 | printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); | ||
803 | return -1; | 636 | return -1; |
804 | } | 637 | } |
805 | 638 | ||
806 | if (no_iommu || | 639 | if (no_iommu || |
807 | (!force_iommu && (end_pfn-1) < 0xffffffff>>PAGE_SHIFT) || | 640 | (!force_iommu && end_pfn <= MAX_DMA32_PFN) || |
808 | !iommu_aperture || | 641 | !iommu_aperture || |
809 | (no_agp && init_k8_gatt(&info) < 0)) { | 642 | (no_agp && init_k8_gatt(&info) < 0)) { |
810 | printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); | ||
811 | no_iommu = 1; | 643 | no_iommu = 1; |
644 | no_iommu_init(); | ||
812 | return -1; | 645 | return -1; |
813 | } | 646 | } |
814 | 647 | ||
@@ -885,100 +718,50 @@ static int __init pci_iommu_init(void) | |||
885 | 718 | ||
886 | flush_gart(NULL); | 719 | flush_gart(NULL); |
887 | 720 | ||
721 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); | ||
722 | dma_ops = &gart_dma_ops; | ||
723 | |||
888 | return 0; | 724 | return 0; |
889 | } | 725 | } |
890 | 726 | ||
891 | /* Must execute after PCI subsystem */ | 727 | /* Must execute after PCI subsystem */ |
892 | fs_initcall(pci_iommu_init); | 728 | fs_initcall(pci_iommu_init); |
893 | 729 | ||
894 | /* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge] | 730 | void gart_parse_options(char *p) |
895 | [,forcesac][,fullflush][,nomerge][,biomerge] | 731 | { |
896 | size set size of iommu (in bytes) | 732 | int arg; |
897 | noagp don't initialize the AGP driver and use full aperture. | 733 | |
898 | off don't use the IOMMU | ||
899 | leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on) | ||
900 | memaper[=order] allocate an own aperture over RAM with size 32MB^order. | ||
901 | noforce don't force IOMMU usage. Default. | ||
902 | force Force IOMMU. | ||
903 | merge Do lazy merging. This may improve performance on some block devices. | ||
904 | Implies force (experimental) | ||
905 | biomerge Do merging at the BIO layer. This is more efficient than merge, | ||
906 | but should be only done with very big IOMMUs. Implies merge,force. | ||
907 | nomerge Don't do SG merging. | ||
908 | forcesac For SAC mode for masks <40bits (experimental) | ||
909 | fullflush Flush IOMMU on each allocation (default) | ||
910 | nofullflush Don't use IOMMU fullflush | ||
911 | allowed overwrite iommu off workarounds for specific chipsets. | ||
912 | soft Use software bounce buffering (default for Intel machines) | ||
913 | noaperture Don't touch the aperture for AGP. | ||
914 | */ | ||
915 | __init int iommu_setup(char *p) | ||
916 | { | ||
917 | int arg; | ||
918 | |||
919 | while (*p) { | ||
920 | if (!strncmp(p,"noagp",5)) | ||
921 | no_agp = 1; | ||
922 | if (!strncmp(p,"off",3)) | ||
923 | no_iommu = 1; | ||
924 | if (!strncmp(p,"force",5)) { | ||
925 | force_iommu = 1; | ||
926 | iommu_aperture_allowed = 1; | ||
927 | } | ||
928 | if (!strncmp(p,"allowed",7)) | ||
929 | iommu_aperture_allowed = 1; | ||
930 | if (!strncmp(p,"noforce",7)) { | ||
931 | iommu_merge = 0; | ||
932 | force_iommu = 0; | ||
933 | } | ||
934 | if (!strncmp(p, "memaper", 7)) { | ||
935 | fallback_aper_force = 1; | ||
936 | p += 7; | ||
937 | if (*p == '=') { | ||
938 | ++p; | ||
939 | if (get_option(&p, &arg)) | ||
940 | fallback_aper_order = arg; | ||
941 | } | ||
942 | } | ||
943 | if (!strncmp(p, "biomerge",8)) { | ||
944 | iommu_bio_merge = 4096; | ||
945 | iommu_merge = 1; | ||
946 | force_iommu = 1; | ||
947 | } | ||
948 | if (!strncmp(p, "panic",5)) | ||
949 | panic_on_overflow = 1; | ||
950 | if (!strncmp(p, "nopanic",7)) | ||
951 | panic_on_overflow = 0; | ||
952 | if (!strncmp(p, "merge",5)) { | ||
953 | iommu_merge = 1; | ||
954 | force_iommu = 1; | ||
955 | } | ||
956 | if (!strncmp(p, "nomerge",7)) | ||
957 | iommu_merge = 0; | ||
958 | if (!strncmp(p, "forcesac",8)) | ||
959 | iommu_sac_force = 1; | ||
960 | if (!strncmp(p, "fullflush",8)) | ||
961 | iommu_fullflush = 1; | ||
962 | if (!strncmp(p, "nofullflush",11)) | ||
963 | iommu_fullflush = 0; | ||
964 | if (!strncmp(p, "soft",4)) | ||
965 | swiotlb = 1; | ||
966 | if (!strncmp(p, "noaperture",10)) | ||
967 | fix_aperture = 0; | ||
968 | #ifdef CONFIG_IOMMU_LEAK | 734 | #ifdef CONFIG_IOMMU_LEAK |
969 | if (!strncmp(p,"leak",4)) { | 735 | if (!strncmp(p,"leak",4)) { |
970 | leak_trace = 1; | 736 | leak_trace = 1; |
971 | p += 4; | 737 | p += 4; |
972 | if (*p == '=') ++p; | 738 | if (*p == '=') ++p; |
973 | if (isdigit(*p) && get_option(&p, &arg)) | 739 | if (isdigit(*p) && get_option(&p, &arg)) |
974 | iommu_leak_pages = arg; | 740 | iommu_leak_pages = arg; |
975 | } else | 741 | } |
976 | #endif | 742 | #endif |
977 | if (isdigit(*p) && get_option(&p, &arg)) | 743 | if (isdigit(*p) && get_option(&p, &arg)) |
978 | iommu_size = arg; | 744 | iommu_size = arg; |
979 | p += strcspn(p, ","); | 745 | if (!strncmp(p, "fullflush",8)) |
980 | if (*p == ',') | 746 | iommu_fullflush = 1; |
981 | ++p; | 747 | if (!strncmp(p, "nofullflush",11)) |
982 | } | 748 | iommu_fullflush = 0; |
983 | return 1; | 749 | if (!strncmp(p,"noagp",5)) |
984 | } | 750 | no_agp = 1; |
751 | if (!strncmp(p, "noaperture",10)) | ||
752 | fix_aperture = 0; | ||
753 | /* duplicated from pci-dma.c */ | ||
754 | if (!strncmp(p,"force",5)) | ||
755 | iommu_aperture_allowed = 1; | ||
756 | if (!strncmp(p,"allowed",7)) | ||
757 | iommu_aperture_allowed = 1; | ||
758 | if (!strncmp(p, "memaper", 7)) { | ||
759 | fallback_aper_force = 1; | ||
760 | p += 7; | ||
761 | if (*p == '=') { | ||
762 | ++p; | ||
763 | if (get_option(&p, &arg)) | ||
764 | fallback_aper_order = arg; | ||
765 | } | ||
766 | } | ||
767 | } | ||