aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMuli Ben-Yehuda <mulix@mulix.org>2006-01-11 16:44:42 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-11 22:04:55 -0500
commit17a941d854a3f7b0bb916fdeee4c9ffdcc19d8d3 (patch)
treeb6b3b55318336adf769bf57141a01a9defbbb202 /arch
parent8a6fdd3e912e0ce6f723431d66baf704bf8a1d26 (diff)
[PATCH] x86_64: Use function pointers to call DMA mapping functions
AK: I hacked Muli's original patch a lot and there were a lot of changes - all bugs are probably to blame on me now. There were also some changes in the fall back behaviour for swiotlb - in particular it doesn't try to use GFP_DMA now anymore. Also all DMA mapping operations use the same core dma_alloc_coherent code with proper fallbacks now. And various other changes and cleanups. Known problems: iommu=force swiotlb=force together breaks needs more testing. This patch cleans up x86_64's DMA mapping dispatching code. Right now we have three possible IOMMU types: AGP GART, swiotlb and nommu, and in the future we will also have Xen's x86_64 swiotlb and other HW IOMMUs for x86_64. In order to support all of them cleanly, this patch: - introduces a struct dma_mapping_ops with function pointers for each of the DMA mapping operations of gart (AMD HW IOMMU), swiotlb (software IOMMU) and nommu (no IOMMU). - gets rid of: if (swiotlb) return swiotlb_xxx(); - PCI_DMA_BUS_IS_PHYS is now checked against the dma_ops being set This makes swiotlb faster by avoiding double copying in some cases. Signed-Off-By: Muli Ben-Yehuda <mulix@mulix.org> Signed-Off-By: Jon D. Mason <jdmason@us.ibm.com> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86_64/Kconfig18
-rw-r--r--arch/x86_64/kernel/Makefile4
-rw-r--r--arch/x86_64/kernel/pci-dma.c286
-rw-r--r--arch/x86_64/kernel/pci-gart.c411
-rw-r--r--arch/x86_64/kernel/pci-nommu.c144
-rw-r--r--arch/x86_64/kernel/pci-swiotlb.c42
-rw-r--r--arch/x86_64/kernel/setup.c10
-rw-r--r--arch/x86_64/mm/init.c13
8 files changed, 477 insertions, 451 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 150e1ac0bfed..ee053e32a721 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -351,32 +351,24 @@ config HPET_EMULATE_RTC
351 depends on HPET_TIMER && RTC=y 351 depends on HPET_TIMER && RTC=y
352 352
353config GART_IOMMU 353config GART_IOMMU
354 bool "IOMMU support" 354 bool "K8 GART IOMMU support"
355 default y 355 default y
356 select SWIOTLB
356 depends on PCI 357 depends on PCI
357 help 358 help
358 Support the IOMMU. Needed to run systems with more than 3GB of memory 359 Support the IOMMU. Needed to run systems with more than 3GB of memory
359 properly with 32-bit PCI devices that do not support DAC (Double Address 360 properly with 32-bit PCI devices that do not support DAC (Double Address
360 Cycle). The IOMMU can be turned off at runtime with the iommu=off parameter. 361 Cycle). The IOMMU can be turned off at runtime with the iommu=off parameter.
361 Normally the kernel will take the right choice by itself. 362 Normally the kernel will take the right choice by itself.
362 This option includes a driver for the AMD Opteron/Athlon64 IOMMU 363 This option includes a driver for the AMD Opteron/Athlon64 northbridge IOMMU
363 and a software emulation used on some other systems. 364 and a software emulation used on other systems.
364 If unsure, say Y. 365 If unsure, say Y.
365 366
366# need this always enabled with GART_IOMMU for the VIA workaround 367# need this always enabled with GART_IOMMU for the VIA workaround
367config SWIOTLB 368config SWIOTLB
368 bool
369 depends on GART_IOMMU
370 default y
371
372config DUMMY_IOMMU
373 bool 369 bool
374 depends on !GART_IOMMU && !SWIOTLB
375 default y 370 default y
376 help 371 depends on GART_IOMMU
377 Don't use IOMMU code. This will cause problems when you have more than 4GB
378 of memory and any 32-bit devices. Don't turn on unless you know what you
379 are doing.
380 372
381config X86_MCE 373config X86_MCE
382 bool "Machine check support" if EMBEDDED 374 bool "Machine check support" if EMBEDDED
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 4601fdafb7cb..7df09edb36a1 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
9 x8664_ksyms.o i387.o syscall.o vsyscall.o \ 9 x8664_ksyms.o i387.o syscall.o vsyscall.o \
10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ 10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
11 dmi_scan.o 11 dmi_scan.o pci-dma.o pci-nommu.o
12 12
13obj-$(CONFIG_X86_MCE) += mce.o 13obj-$(CONFIG_X86_MCE) += mce.o
14obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 14obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
@@ -29,7 +29,7 @@ obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
29obj-$(CONFIG_CPU_FREQ) += cpufreq/ 29obj-$(CONFIG_CPU_FREQ) += cpufreq/
30obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 30obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
31obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o 31obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
32obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o 32obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
33obj-$(CONFIG_KPROBES) += kprobes.o 33obj-$(CONFIG_KPROBES) += kprobes.o
34obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o 34obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
35 35
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index cab471cf3edb..2f5d8328e2b9 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -8,53 +8,259 @@
8#include <linux/pci.h> 8#include <linux/pci.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <asm/io.h> 10#include <asm/io.h>
11#include <asm/proto.h>
11 12
12/* Map a set of buffers described by scatterlist in streaming 13int iommu_merge __read_mostly = 0;
13 * mode for DMA. This is the scatter-gather version of the 14EXPORT_SYMBOL(iommu_merge);
14 * above pci_map_single interface. Here the scatter gather list 15
15 * elements are each tagged with the appropriate dma address 16dma_addr_t bad_dma_address __read_mostly;
16 * and length. They are obtained via sg_dma_{address,length}(SG). 17EXPORT_SYMBOL(bad_dma_address);
17 * 18
18 * NOTE: An implementation may be able to use a smaller number of 19/* This tells the BIO block layer to assume merging. Default to off
19 * DMA address/length pairs than there are SG table elements. 20 because we cannot guarantee merging later. */
20 * (for example via virtual mapping capabilities) 21int iommu_bio_merge __read_mostly = 0;
21 * The routine returns the number of addr/length pairs actually 22EXPORT_SYMBOL(iommu_bio_merge);
22 * used, at most nents. 23
23 * 24int iommu_sac_force __read_mostly = 0;
24 * Device ownership issues as mentioned above for pci_map_single are 25EXPORT_SYMBOL(iommu_sac_force);
25 * the same here. 26
27int no_iommu __read_mostly;
28#ifdef CONFIG_IOMMU_DEBUG
29int panic_on_overflow __read_mostly = 1;
30int force_iommu __read_mostly = 1;
31#else
32int panic_on_overflow __read_mostly = 0;
33int force_iommu __read_mostly= 0;
34#endif
35
36/* Dummy device used for NULL arguments (normally ISA). Better would
37 be probably a smaller DMA mask, but this is bug-to-bug compatible
38 to i386. */
39struct device fallback_dev = {
40 .bus_id = "fallback device",
41 .coherent_dma_mask = 0xffffffff,
42 .dma_mask = &fallback_dev.coherent_dma_mask,
43};
44
45/* Allocate DMA memory on node near device */
46noinline static void *
47dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
48{
49 struct page *page;
50 int node;
51 if (dev->bus == &pci_bus_type)
52 node = pcibus_to_node(to_pci_dev(dev)->bus);
53 else
54 node = numa_node_id();
55 page = alloc_pages_node(node, gfp, order);
56 return page ? page_address(page) : NULL;
57}
58
59/*
60 * Allocate memory for a coherent mapping.
26 */ 61 */
27int dma_map_sg(struct device *hwdev, struct scatterlist *sg, 62void *
28 int nents, int direction) 63dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
64 gfp_t gfp)
29{ 65{
30 int i; 66 void *memory;
31 67 unsigned long dma_mask = 0;
32 BUG_ON(direction == DMA_NONE); 68 u64 bus;
33 for (i = 0; i < nents; i++ ) { 69
34 struct scatterlist *s = &sg[i]; 70 if (!dev)
35 BUG_ON(!s->page); 71 dev = &fallback_dev;
36 s->dma_address = virt_to_bus(page_address(s->page) +s->offset); 72 dma_mask = dev->coherent_dma_mask;
37 s->dma_length = s->length; 73 if (dma_mask == 0)
74 dma_mask = 0xffffffff;
75
76 /* Kludge to make it bug-to-bug compatible with i386. i386
77 uses the normal dma_mask for alloc_coherent. */
78 dma_mask &= *dev->dma_mask;
79
80 /* Why <=? Even when the mask is smaller than 4GB it is often
81 larger than 16MB and in this case we have a chance of
82 finding fitting memory in the next higher zone first. If
83 not retry with true GFP_DMA. -AK */
84 if (dma_mask <= 0xffffffff)
85 gfp |= GFP_DMA32;
86
87 again:
88 memory = dma_alloc_pages(dev, gfp, get_order(size));
89 if (memory == NULL)
90 return NULL;
91
92 {
93 int high, mmu;
94 bus = virt_to_bus(memory);
95 high = (bus + size) >= dma_mask;
96 mmu = high;
97 if (force_iommu && !(gfp & GFP_DMA))
98 mmu = 1;
99 else if (high) {
100 free_pages((unsigned long)memory,
101 get_order(size));
102
103 /* Don't use the 16MB ZONE_DMA unless absolutely
104 needed. It's better to use remapping first. */
105 if (dma_mask < 0xffffffff && !(gfp & GFP_DMA)) {
106 gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
107 goto again;
108 }
109
110 if (dma_ops->alloc_coherent)
111 return dma_ops->alloc_coherent(dev, size,
112 dma_handle, gfp);
113 return NULL;
114 }
115
116 memset(memory, 0, size);
117 if (!mmu) {
118 *dma_handle = virt_to_bus(memory);
119 return memory;
120 }
121 }
122
123 if (dma_ops->alloc_coherent) {
124 free_pages((unsigned long)memory, get_order(size));
125 gfp &= ~(GFP_DMA|GFP_DMA32);
126 return dma_ops->alloc_coherent(dev, size, dma_handle, gfp);
127 }
128
129 if (dma_ops->map_simple) {
130 *dma_handle = dma_ops->map_simple(dev, memory,
131 size,
132 PCI_DMA_BIDIRECTIONAL);
133 if (*dma_handle != bad_dma_address)
134 return memory;
38 } 135 }
39 return nents;
40}
41 136
42EXPORT_SYMBOL(dma_map_sg); 137 if (panic_on_overflow)
138 panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",size);
139 free_pages((unsigned long)memory, get_order(size));
140 return NULL;
141}
142EXPORT_SYMBOL(dma_alloc_coherent);
43 143
44/* Unmap a set of streaming mode DMA translations. 144/*
45 * Again, cpu read rules concerning calls here are the same as for 145 * Unmap coherent memory.
46 * pci_unmap_single() above. 146 * The caller must ensure that the device has finished accessing the mapping.
47 */ 147 */
48void dma_unmap_sg(struct device *dev, struct scatterlist *sg, 148void dma_free_coherent(struct device *dev, size_t size,
49 int nents, int dir) 149 void *vaddr, dma_addr_t bus)
150{
151 if (dma_ops->unmap_single)
152 dma_ops->unmap_single(dev, bus, size, 0);
153 free_pages((unsigned long)vaddr, get_order(size));
154}
155EXPORT_SYMBOL(dma_free_coherent);
156
157int dma_supported(struct device *dev, u64 mask)
158{
159 if (dma_ops->dma_supported)
160 return dma_ops->dma_supported(dev, mask);
161
162 /* Copied from i386. Doesn't make much sense, because it will
163 only work for pci_alloc_coherent.
164 The caller just has to use GFP_DMA in this case. */
165 if (mask < 0x00ffffff)
166 return 0;
167
168 /* Tell the device to use SAC when IOMMU force is on. This
169 allows the driver to use cheaper accesses in some cases.
170
171 Problem with this is that if we overflow the IOMMU area and
172 return DAC as fallback address the device may not handle it
173 correctly.
174
175 As a special case some controllers have a 39bit address
176 mode that is as efficient as 32bit (aic79xx). Don't force
177 SAC for these. Assume all masks <= 40 bits are of this
178 type. Normally this doesn't make any difference, but gives
179 more gentle handling of IOMMU overflow. */
180 if (iommu_sac_force && (mask >= 0xffffffffffULL)) {
181 printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
182 return 0;
183 }
184
185 return 1;
186}
187EXPORT_SYMBOL(dma_supported);
188
189int dma_set_mask(struct device *dev, u64 mask)
50{ 190{
51 int i; 191 if (!dev->dma_mask || !dma_supported(dev, mask))
52 for (i = 0; i < nents; i++) { 192 return -EIO;
53 struct scatterlist *s = &sg[i]; 193 *dev->dma_mask = mask;
54 BUG_ON(s->page == NULL); 194 return 0;
55 BUG_ON(s->dma_address == 0);
56 dma_unmap_single(dev, s->dma_address, s->dma_length, dir);
57 }
58} 195}
196EXPORT_SYMBOL(dma_set_mask);
197
198/* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge]
199 [,forcesac][,fullflush][,nomerge][,biomerge]
200 size set size of iommu (in bytes)
201 noagp don't initialize the AGP driver and use full aperture.
202 off don't use the IOMMU
203 leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on)
204 memaper[=order] allocate an own aperture over RAM with size 32MB^order.
205 noforce don't force IOMMU usage. Default.
206 force Force IOMMU.
207 merge Do lazy merging. This may improve performance on some block devices.
208 Implies force (experimental)
209 biomerge Do merging at the BIO layer. This is more efficient than merge,
210 but should be only done with very big IOMMUs. Implies merge,force.
211 nomerge Don't do SG merging.
212 forcesac For SAC mode for masks <40bits (experimental)
213 fullflush Flush IOMMU on each allocation (default)
214 nofullflush Don't use IOMMU fullflush
215 allowed overwrite iommu off workarounds for specific chipsets.
216 soft Use software bounce buffering (default for Intel machines)
217 noaperture Don't touch the aperture for AGP.
218*/
219__init int iommu_setup(char *p)
220{
221 iommu_merge = 1;
59 222
60EXPORT_SYMBOL(dma_unmap_sg); 223 while (*p) {
224 if (!strncmp(p,"off",3))
225 no_iommu = 1;
226 /* gart_parse_options has more force support */
227 if (!strncmp(p,"force",5))
228 force_iommu = 1;
229 if (!strncmp(p,"noforce",7)) {
230 iommu_merge = 0;
231 force_iommu = 0;
232 }
233
234 if (!strncmp(p, "biomerge",8)) {
235 iommu_bio_merge = 4096;
236 iommu_merge = 1;
237 force_iommu = 1;
238 }
239 if (!strncmp(p, "panic",5))
240 panic_on_overflow = 1;
241 if (!strncmp(p, "nopanic",7))
242 panic_on_overflow = 0;
243 if (!strncmp(p, "merge",5)) {
244 iommu_merge = 1;
245 force_iommu = 1;
246 }
247 if (!strncmp(p, "nomerge",7))
248 iommu_merge = 0;
249 if (!strncmp(p, "forcesac",8))
250 iommu_sac_force = 1;
251
252#ifdef CONFIG_SWIOTLB
253 if (!strncmp(p, "soft",4))
254 swiotlb = 1;
255#endif
256
257#ifdef CONFIG_GART_IOMMU
258 gart_parse_options(p);
259#endif
260
261 p += strcspn(p, ",");
262 if (*p == ',')
263 ++p;
264 }
265 return 1;
266}
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 116e00f1bb7b..c37fc7726ba6 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -30,8 +30,8 @@
30#include <asm/proto.h> 30#include <asm/proto.h>
31#include <asm/cacheflush.h> 31#include <asm/cacheflush.h>
32#include <asm/kdebug.h> 32#include <asm/kdebug.h>
33 33#include <asm/swiotlb.h>
34dma_addr_t bad_dma_address; 34#include <asm/dma.h>
35 35
36unsigned long iommu_bus_base; /* GART remapping area (physical) */ 36unsigned long iommu_bus_base; /* GART remapping area (physical) */
37static unsigned long iommu_size; /* size of remapping area bytes */ 37static unsigned long iommu_size; /* size of remapping area bytes */
@@ -39,18 +39,6 @@ static unsigned long iommu_pages; /* .. and in pages */
39 39
40u32 *iommu_gatt_base; /* Remapping table */ 40u32 *iommu_gatt_base; /* Remapping table */
41 41
42int no_iommu;
43static int no_agp;
44#ifdef CONFIG_IOMMU_DEBUG
45int panic_on_overflow = 1;
46int force_iommu = 1;
47#else
48int panic_on_overflow = 0;
49int force_iommu = 0;
50#endif
51int iommu_merge = 1;
52int iommu_sac_force = 0;
53
54/* If this is disabled the IOMMU will use an optimized flushing strategy 42/* If this is disabled the IOMMU will use an optimized flushing strategy
55 of only flushing when an mapping is reused. With it true the GART is flushed 43 of only flushing when an mapping is reused. With it true the GART is flushed
56 for every mapping. Problem is that doing the lazy flush seems to trigger 44 for every mapping. Problem is that doing the lazy flush seems to trigger
@@ -58,10 +46,6 @@ int iommu_sac_force = 0;
58 also seen with Qlogic at least). */ 46 also seen with Qlogic at least). */
59int iommu_fullflush = 1; 47int iommu_fullflush = 1;
60 48
61/* This tells the BIO block layer to assume merging. Default to off
62 because we cannot guarantee merging later. */
63int iommu_bio_merge = 0;
64
65#define MAX_NB 8 49#define MAX_NB 8
66 50
67/* Allocation bitmap for the remapping area */ 51/* Allocation bitmap for the remapping area */
@@ -102,16 +86,6 @@ AGPEXTERN __u32 *agp_gatt_table;
102 86
103static unsigned long next_bit; /* protected by iommu_bitmap_lock */ 87static unsigned long next_bit; /* protected by iommu_bitmap_lock */
104static int need_flush; /* global flush state. set for each gart wrap */ 88static int need_flush; /* global flush state. set for each gart wrap */
105static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem,
106 size_t size, int dir, int do_panic);
107
108/* Dummy device used for NULL arguments (normally ISA). Better would
109 be probably a smaller DMA mask, but this is bug-to-bug compatible to i386. */
110static struct device fallback_dev = {
111 .bus_id = "fallback device",
112 .coherent_dma_mask = 0xffffffff,
113 .dma_mask = &fallback_dev.coherent_dma_mask,
114};
115 89
116static unsigned long alloc_iommu(int size) 90static unsigned long alloc_iommu(int size)
117{ 91{
@@ -185,114 +159,7 @@ static void flush_gart(struct device *dev)
185 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 159 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
186} 160}
187 161
188/* Allocate DMA memory on node near device */
189noinline
190static void *dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
191{
192 struct page *page;
193 int node;
194 if (dev->bus == &pci_bus_type)
195 node = pcibus_to_node(to_pci_dev(dev)->bus);
196 else
197 node = numa_node_id();
198 page = alloc_pages_node(node, gfp, order);
199 return page ? page_address(page) : NULL;
200}
201
202/*
203 * Allocate memory for a coherent mapping.
204 */
205void *
206dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
207 gfp_t gfp)
208{
209 void *memory;
210 unsigned long dma_mask = 0;
211 u64 bus;
212 162
213 if (!dev)
214 dev = &fallback_dev;
215 dma_mask = dev->coherent_dma_mask;
216 if (dma_mask == 0)
217 dma_mask = 0xffffffff;
218
219 /* Kludge to make it bug-to-bug compatible with i386. i386
220 uses the normal dma_mask for alloc_coherent. */
221 dma_mask &= *dev->dma_mask;
222
223 /* Why <=? Even when the mask is smaller than 4GB it is often larger
224 than 16MB and in this case we have a chance of finding fitting memory
225 in the next higher zone first. If not retry with true GFP_DMA. -AK */
226 if (dma_mask <= 0xffffffff)
227 gfp |= GFP_DMA32;
228
229 again:
230 memory = dma_alloc_pages(dev, gfp, get_order(size));
231 if (memory == NULL)
232 return NULL;
233
234 {
235 int high, mmu;
236 bus = virt_to_bus(memory);
237 high = (bus + size) >= dma_mask;
238 mmu = high;
239 if (force_iommu && !(gfp & GFP_DMA))
240 mmu = 1;
241 if (no_iommu || dma_mask < 0xffffffffUL) {
242 if (high) {
243 free_pages((unsigned long)memory,
244 get_order(size));
245
246 if (swiotlb) {
247 return
248 swiotlb_alloc_coherent(dev, size,
249 dma_handle,
250 gfp);
251 }
252
253 if (!(gfp & GFP_DMA)) {
254 gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
255 goto again;
256 }
257 return NULL;
258 }
259 mmu = 0;
260 }
261 memset(memory, 0, size);
262 if (!mmu) {
263 *dma_handle = virt_to_bus(memory);
264 return memory;
265 }
266 }
267
268 *dma_handle = dma_map_area(dev, bus, size, PCI_DMA_BIDIRECTIONAL, 0);
269 if (*dma_handle == bad_dma_address)
270 goto error;
271 flush_gart(dev);
272 return memory;
273
274error:
275 if (panic_on_overflow)
276 panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", size);
277 free_pages((unsigned long)memory, get_order(size));
278 return NULL;
279}
280
281/*
282 * Unmap coherent memory.
283 * The caller must ensure that the device has finished accessing the mapping.
284 */
285void dma_free_coherent(struct device *dev, size_t size,
286 void *vaddr, dma_addr_t bus)
287{
288 if (swiotlb) {
289 swiotlb_free_coherent(dev, size, vaddr, bus);
290 return;
291 }
292
293 dma_unmap_single(dev, bus, size, 0);
294 free_pages((unsigned long)vaddr, get_order(size));
295}
296 163
297#ifdef CONFIG_IOMMU_LEAK 164#ifdef CONFIG_IOMMU_LEAK
298 165
@@ -326,7 +193,7 @@ void dump_leak(void)
326#define CLEAR_LEAK(x) 193#define CLEAR_LEAK(x)
327#endif 194#endif
328 195
329static void iommu_full(struct device *dev, size_t size, int dir, int do_panic) 196static void iommu_full(struct device *dev, size_t size, int dir)
330{ 197{
331 /* 198 /*
332 * Ran out of IOMMU space for this operation. This is very bad. 199 * Ran out of IOMMU space for this operation. This is very bad.
@@ -342,11 +209,11 @@ static void iommu_full(struct device *dev, size_t size, int dir, int do_panic)
342 "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", 209 "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n",
343 size, dev->bus_id); 210 size, dev->bus_id);
344 211
345 if (size > PAGE_SIZE*EMERGENCY_PAGES && do_panic) { 212 if (size > PAGE_SIZE*EMERGENCY_PAGES) {
346 if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) 213 if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
347 panic("PCI-DMA: Memory would be corrupted\n"); 214 panic("PCI-DMA: Memory would be corrupted\n");
348 if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) 215 if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
349 panic("PCI-DMA: Random memory would be DMAed\n"); 216 panic(KERN_ERR "PCI-DMA: Random memory would be DMAed\n");
350 } 217 }
351 218
352#ifdef CONFIG_IOMMU_LEAK 219#ifdef CONFIG_IOMMU_LEAK
@@ -385,8 +252,8 @@ static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t
385/* Map a single continuous physical area into the IOMMU. 252/* Map a single continuous physical area into the IOMMU.
386 * Caller needs to check if the iommu is needed and flush. 253 * Caller needs to check if the iommu is needed and flush.
387 */ 254 */
388static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem, 255static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
389 size_t size, int dir, int do_panic) 256 size_t size, int dir)
390{ 257{
391 unsigned long npages = to_pages(phys_mem, size); 258 unsigned long npages = to_pages(phys_mem, size);
392 unsigned long iommu_page = alloc_iommu(npages); 259 unsigned long iommu_page = alloc_iommu(npages);
@@ -396,7 +263,7 @@ static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem,
396 return phys_mem; 263 return phys_mem;
397 if (panic_on_overflow) 264 if (panic_on_overflow)
398 panic("dma_map_area overflow %lu bytes\n", size); 265 panic("dma_map_area overflow %lu bytes\n", size);
399 iommu_full(dev, size, dir, do_panic); 266 iommu_full(dev, size, dir);
400 return bad_dma_address; 267 return bad_dma_address;
401 } 268 }
402 269
@@ -408,15 +275,21 @@ static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem,
408 return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); 275 return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
409} 276}
410 277
278static dma_addr_t gart_map_simple(struct device *dev, char *buf,
279 size_t size, int dir)
280{
281 dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
282 flush_gart(dev);
283 return map;
284}
285
411/* Map a single area into the IOMMU */ 286/* Map a single area into the IOMMU */
412dma_addr_t dma_map_single(struct device *dev, void *addr, size_t size, int dir) 287dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
413{ 288{
414 unsigned long phys_mem, bus; 289 unsigned long phys_mem, bus;
415 290
416 BUG_ON(dir == DMA_NONE); 291 BUG_ON(dir == DMA_NONE);
417 292
418 if (swiotlb)
419 return swiotlb_map_single(dev,addr,size,dir);
420 if (!dev) 293 if (!dev)
421 dev = &fallback_dev; 294 dev = &fallback_dev;
422 295
@@ -424,10 +297,24 @@ dma_addr_t dma_map_single(struct device *dev, void *addr, size_t size, int dir)
424 if (!need_iommu(dev, phys_mem, size)) 297 if (!need_iommu(dev, phys_mem, size))
425 return phys_mem; 298 return phys_mem;
426 299
427 bus = dma_map_area(dev, phys_mem, size, dir, 1); 300 bus = gart_map_simple(dev, addr, size, dir);
428 flush_gart(dev);
429 return bus; 301 return bus;
430} 302}
303
304/*
305 * Wrapper for pci_unmap_single working with scatterlists.
306 */
307void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
308{
309 int i;
310
311 for (i = 0; i < nents; i++) {
312 struct scatterlist *s = &sg[i];
313 if (!s->dma_length || !s->length)
314 break;
315 dma_unmap_single(dev, s->dma_address, s->dma_length, dir);
316 }
317}
431 318
432/* Fallback for dma_map_sg in case of overflow */ 319/* Fallback for dma_map_sg in case of overflow */
433static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, 320static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
@@ -443,10 +330,10 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
443 struct scatterlist *s = &sg[i]; 330 struct scatterlist *s = &sg[i];
444 unsigned long addr = page_to_phys(s->page) + s->offset; 331 unsigned long addr = page_to_phys(s->page) + s->offset;
445 if (nonforced_iommu(dev, addr, s->length)) { 332 if (nonforced_iommu(dev, addr, s->length)) {
446 addr = dma_map_area(dev, addr, s->length, dir, 0); 333 addr = dma_map_area(dev, addr, s->length, dir);
447 if (addr == bad_dma_address) { 334 if (addr == bad_dma_address) {
448 if (i > 0) 335 if (i > 0)
449 dma_unmap_sg(dev, sg, i, dir); 336 gart_unmap_sg(dev, sg, i, dir);
450 nents = 0; 337 nents = 0;
451 sg[0].dma_length = 0; 338 sg[0].dma_length = 0;
452 break; 339 break;
@@ -515,7 +402,7 @@ static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat,
515 * DMA map all entries in a scatterlist. 402 * DMA map all entries in a scatterlist.
516 * Merge chunks that have page aligned sizes into a continuous mapping. 403 * Merge chunks that have page aligned sizes into a continuous mapping.
517 */ 404 */
518int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) 405int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
519{ 406{
520 int i; 407 int i;
521 int out; 408 int out;
@@ -527,8 +414,6 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
527 if (nents == 0) 414 if (nents == 0)
528 return 0; 415 return 0;
529 416
530 if (swiotlb)
531 return swiotlb_map_sg(dev,sg,nents,dir);
532 if (!dev) 417 if (!dev)
533 dev = &fallback_dev; 418 dev = &fallback_dev;
534 419
@@ -571,13 +456,13 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
571 456
572error: 457error:
573 flush_gart(NULL); 458 flush_gart(NULL);
574 dma_unmap_sg(dev, sg, nents, dir); 459 gart_unmap_sg(dev, sg, nents, dir);
575 /* When it was forced try again unforced */ 460 /* When it was forced try again unforced */
576 if (force_iommu) 461 if (force_iommu)
577 return dma_map_sg_nonforce(dev, sg, nents, dir); 462 return dma_map_sg_nonforce(dev, sg, nents, dir);
578 if (panic_on_overflow) 463 if (panic_on_overflow)
579 panic("dma_map_sg: overflow on %lu pages\n", pages); 464 panic("dma_map_sg: overflow on %lu pages\n", pages);
580 iommu_full(dev, pages << PAGE_SHIFT, dir, 0); 465 iommu_full(dev, pages << PAGE_SHIFT, dir);
581 for (i = 0; i < nents; i++) 466 for (i = 0; i < nents; i++)
582 sg[i].dma_address = bad_dma_address; 467 sg[i].dma_address = bad_dma_address;
583 return 0; 468 return 0;
@@ -586,18 +471,13 @@ error:
586/* 471/*
587 * Free a DMA mapping. 472 * Free a DMA mapping.
588 */ 473 */
589void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, 474void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
590 size_t size, int direction) 475 size_t size, int direction)
591{ 476{
592 unsigned long iommu_page; 477 unsigned long iommu_page;
593 int npages; 478 int npages;
594 int i; 479 int i;
595 480
596 if (swiotlb) {
597 swiotlb_unmap_single(dev,dma_addr,size,direction);
598 return;
599 }
600
601 if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || 481 if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
602 dma_addr >= iommu_bus_base + iommu_size) 482 dma_addr >= iommu_bus_base + iommu_size)
603 return; 483 return;
@@ -610,68 +490,7 @@ void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
610 free_iommu(iommu_page, npages); 490 free_iommu(iommu_page, npages);
611} 491}
612 492
613/* 493static int no_agp;
614 * Wrapper for pci_unmap_single working with scatterlists.
615 */
616void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
617{
618 int i;
619 if (swiotlb) {
620 swiotlb_unmap_sg(dev,sg,nents,dir);
621 return;
622 }
623 for (i = 0; i < nents; i++) {
624 struct scatterlist *s = &sg[i];
625 if (!s->dma_length || !s->length)
626 break;
627 dma_unmap_single(dev, s->dma_address, s->dma_length, dir);
628 }
629}
630
631int dma_supported(struct device *dev, u64 mask)
632{
633 /* Copied from i386. Doesn't make much sense, because it will
634 only work for pci_alloc_coherent.
635 The caller just has to use GFP_DMA in this case. */
636 if (mask < 0x00ffffff)
637 return 0;
638
639 /* Tell the device to use SAC when IOMMU force is on.
640 This allows the driver to use cheaper accesses in some cases.
641
642 Problem with this is that if we overflow the IOMMU area
643 and return DAC as fallback address the device may not handle it correctly.
644
645 As a special case some controllers have a 39bit address mode
646 that is as efficient as 32bit (aic79xx). Don't force SAC for these.
647 Assume all masks <= 40 bits are of this type. Normally this doesn't
648 make any difference, but gives more gentle handling of IOMMU overflow. */
649 if (iommu_sac_force && (mask >= 0xffffffffffULL)) {
650 printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
651 return 0;
652 }
653
654 return 1;
655}
656
657int dma_get_cache_alignment(void)
658{
659 return boot_cpu_data.x86_clflush_size;
660}
661
662EXPORT_SYMBOL(dma_unmap_sg);
663EXPORT_SYMBOL(dma_map_sg);
664EXPORT_SYMBOL(dma_map_single);
665EXPORT_SYMBOL(dma_unmap_single);
666EXPORT_SYMBOL(dma_supported);
667EXPORT_SYMBOL(no_iommu);
668EXPORT_SYMBOL(force_iommu);
669EXPORT_SYMBOL(bad_dma_address);
670EXPORT_SYMBOL(iommu_bio_merge);
671EXPORT_SYMBOL(iommu_sac_force);
672EXPORT_SYMBOL(dma_get_cache_alignment);
673EXPORT_SYMBOL(dma_alloc_coherent);
674EXPORT_SYMBOL(dma_free_coherent);
675 494
676static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) 495static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
677{ 496{
@@ -778,6 +597,21 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
778 597
779extern int agp_amd64_init(void); 598extern int agp_amd64_init(void);
780 599
600static struct dma_mapping_ops gart_dma_ops = {
601 .mapping_error = NULL,
602 .map_single = gart_map_single,
603 .map_simple = gart_map_simple,
604 .unmap_single = gart_unmap_single,
605 .sync_single_for_cpu = NULL,
606 .sync_single_for_device = NULL,
607 .sync_single_range_for_cpu = NULL,
608 .sync_single_range_for_device = NULL,
609 .sync_sg_for_cpu = NULL,
610 .sync_sg_for_device = NULL,
611 .map_sg = gart_map_sg,
612 .unmap_sg = gart_unmap_sg,
613};
614
781static int __init pci_iommu_init(void) 615static int __init pci_iommu_init(void)
782{ 616{
783 struct agp_kern_info info; 617 struct agp_kern_info info;
@@ -799,16 +633,15 @@ static int __init pci_iommu_init(void)
799 633
800 if (swiotlb) { 634 if (swiotlb) {
801 no_iommu = 1; 635 no_iommu = 1;
802 printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
803 return -1; 636 return -1;
804 } 637 }
805 638
806 if (no_iommu || 639 if (no_iommu ||
807 (!force_iommu && (end_pfn-1) < 0xffffffff>>PAGE_SHIFT) || 640 (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
808 !iommu_aperture || 641 !iommu_aperture ||
809 (no_agp && init_k8_gatt(&info) < 0)) { 642 (no_agp && init_k8_gatt(&info) < 0)) {
810 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
811 no_iommu = 1; 643 no_iommu = 1;
644 no_iommu_init();
812 return -1; 645 return -1;
813 } 646 }
814 647
@@ -885,100 +718,50 @@ static int __init pci_iommu_init(void)
885 718
886 flush_gart(NULL); 719 flush_gart(NULL);
887 720
721 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
722 dma_ops = &gart_dma_ops;
723
888 return 0; 724 return 0;
889} 725}
890 726
891/* Must execute after PCI subsystem */ 727/* Must execute after PCI subsystem */
892fs_initcall(pci_iommu_init); 728fs_initcall(pci_iommu_init);
893 729
894/* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge] 730void gart_parse_options(char *p)
895 [,forcesac][,fullflush][,nomerge][,biomerge] 731{
896 size set size of iommu (in bytes) 732 int arg;
897 noagp don't initialize the AGP driver and use full aperture. 733
898 off don't use the IOMMU
899 leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on)
900 memaper[=order] allocate an own aperture over RAM with size 32MB^order.
901 noforce don't force IOMMU usage. Default.
902 force Force IOMMU.
903 merge Do lazy merging. This may improve performance on some block devices.
904 Implies force (experimental)
905 biomerge Do merging at the BIO layer. This is more efficient than merge,
906 but should be only done with very big IOMMUs. Implies merge,force.
907 nomerge Don't do SG merging.
908 forcesac For SAC mode for masks <40bits (experimental)
909 fullflush Flush IOMMU on each allocation (default)
910 nofullflush Don't use IOMMU fullflush
911 allowed overwrite iommu off workarounds for specific chipsets.
912 soft Use software bounce buffering (default for Intel machines)
913 noaperture Don't touch the aperture for AGP.
914*/
915__init int iommu_setup(char *p)
916{
917 int arg;
918
919 while (*p) {
920 if (!strncmp(p,"noagp",5))
921 no_agp = 1;
922 if (!strncmp(p,"off",3))
923 no_iommu = 1;
924 if (!strncmp(p,"force",5)) {
925 force_iommu = 1;
926 iommu_aperture_allowed = 1;
927 }
928 if (!strncmp(p,"allowed",7))
929 iommu_aperture_allowed = 1;
930 if (!strncmp(p,"noforce",7)) {
931 iommu_merge = 0;
932 force_iommu = 0;
933 }
934 if (!strncmp(p, "memaper", 7)) {
935 fallback_aper_force = 1;
936 p += 7;
937 if (*p == '=') {
938 ++p;
939 if (get_option(&p, &arg))
940 fallback_aper_order = arg;
941 }
942 }
943 if (!strncmp(p, "biomerge",8)) {
944 iommu_bio_merge = 4096;
945 iommu_merge = 1;
946 force_iommu = 1;
947 }
948 if (!strncmp(p, "panic",5))
949 panic_on_overflow = 1;
950 if (!strncmp(p, "nopanic",7))
951 panic_on_overflow = 0;
952 if (!strncmp(p, "merge",5)) {
953 iommu_merge = 1;
954 force_iommu = 1;
955 }
956 if (!strncmp(p, "nomerge",7))
957 iommu_merge = 0;
958 if (!strncmp(p, "forcesac",8))
959 iommu_sac_force = 1;
960 if (!strncmp(p, "fullflush",8))
961 iommu_fullflush = 1;
962 if (!strncmp(p, "nofullflush",11))
963 iommu_fullflush = 0;
964 if (!strncmp(p, "soft",4))
965 swiotlb = 1;
966 if (!strncmp(p, "noaperture",10))
967 fix_aperture = 0;
968#ifdef CONFIG_IOMMU_LEAK 734#ifdef CONFIG_IOMMU_LEAK
969 if (!strncmp(p,"leak",4)) { 735 if (!strncmp(p,"leak",4)) {
970 leak_trace = 1; 736 leak_trace = 1;
971 p += 4; 737 p += 4;
972 if (*p == '=') ++p; 738 if (*p == '=') ++p;
973 if (isdigit(*p) && get_option(&p, &arg)) 739 if (isdigit(*p) && get_option(&p, &arg))
974 iommu_leak_pages = arg; 740 iommu_leak_pages = arg;
975 } else 741 }
976#endif 742#endif
977 if (isdigit(*p) && get_option(&p, &arg)) 743 if (isdigit(*p) && get_option(&p, &arg))
978 iommu_size = arg; 744 iommu_size = arg;
979 p += strcspn(p, ","); 745 if (!strncmp(p, "fullflush",8))
980 if (*p == ',') 746 iommu_fullflush = 1;
981 ++p; 747 if (!strncmp(p, "nofullflush",11))
982 } 748 iommu_fullflush = 0;
983 return 1; 749 if (!strncmp(p,"noagp",5))
984} 750 no_agp = 1;
751 if (!strncmp(p, "noaperture",10))
752 fix_aperture = 0;
753 /* duplicated from pci-dma.c */
754 if (!strncmp(p,"force",5))
755 iommu_aperture_allowed = 1;
756 if (!strncmp(p,"allowed",7))
757 iommu_aperture_allowed = 1;
758 if (!strncmp(p, "memaper", 7)) {
759 fallback_aper_force = 1;
760 p += 7;
761 if (*p == '=') {
762 ++p;
763 if (get_option(&p, &arg))
764 fallback_aper_order = arg;
765 }
766 }
767}
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
index 5a981dca87ff..3209adac3272 100644
--- a/arch/x86_64/kernel/pci-nommu.c
+++ b/arch/x86_64/kernel/pci-nommu.c
@@ -6,89 +6,93 @@
6#include <linux/string.h> 6#include <linux/string.h>
7#include <asm/proto.h> 7#include <asm/proto.h>
8#include <asm/processor.h> 8#include <asm/processor.h>
9#include <asm/dma.h>
9 10
10int iommu_merge = 0; 11static int
11EXPORT_SYMBOL(iommu_merge); 12check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
12
13dma_addr_t bad_dma_address;
14EXPORT_SYMBOL(bad_dma_address);
15
16int iommu_bio_merge = 0;
17EXPORT_SYMBOL(iommu_bio_merge);
18
19int iommu_sac_force = 0;
20EXPORT_SYMBOL(iommu_sac_force);
21
22/*
23 * Dummy IO MMU functions
24 */
25
26void *dma_alloc_coherent(struct device *hwdev, size_t size,
27 dma_addr_t *dma_handle, gfp_t gfp)
28{ 13{
29 void *ret; 14 if (hwdev && bus + size > *hwdev->dma_mask) {
30 u64 mask; 15 printk(KERN_ERR
31 int order = get_order(size); 16 "nommu_%s: overflow %Lx+%lu of device mask %Lx\n",
32 17 name, (long long)bus, size, (long long)*hwdev->dma_mask);
33 if (hwdev) 18 return 0;
34 mask = hwdev->coherent_dma_mask & *hwdev->dma_mask;
35 else
36 mask = 0xffffffff;
37 for (;;) {
38 ret = (void *)__get_free_pages(gfp, order);
39 if (ret == NULL)
40 return NULL;
41 *dma_handle = virt_to_bus(ret);
42 if ((*dma_handle & ~mask) == 0)
43 break;
44 free_pages((unsigned long)ret, order);
45 if (gfp & GFP_DMA)
46 return NULL;
47 gfp |= GFP_DMA;
48 } 19 }
20 return 1;
21}
49 22
50 memset(ret, 0, size); 23static dma_addr_t
51 return ret; 24nommu_map_single(struct device *hwdev, void *ptr, size_t size,
25 int direction)
26{
27 dma_addr_t bus = virt_to_bus(ptr);
28 if (!check_addr("map_single", hwdev, bus, size))
29 return bad_dma_address;
30 return bus;
52} 31}
53EXPORT_SYMBOL(dma_alloc_coherent);
54 32
55void dma_free_coherent(struct device *hwdev, size_t size, 33void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
56 void *vaddr, dma_addr_t dma_handle) 34 int direction)
57{ 35{
58 free_pages((unsigned long)vaddr, get_order(size));
59} 36}
60EXPORT_SYMBOL(dma_free_coherent);
61 37
62int dma_supported(struct device *hwdev, u64 mask) 38/* Map a set of buffers described by scatterlist in streaming
39 * mode for DMA. This is the scatter-gather version of the
40 * above pci_map_single interface. Here the scatter gather list
41 * elements are each tagged with the appropriate dma address
42 * and length. They are obtained via sg_dma_{address,length}(SG).
43 *
44 * NOTE: An implementation may be able to use a smaller number of
45 * DMA address/length pairs than there are SG table elements.
46 * (for example via virtual mapping capabilities)
47 * The routine returns the number of addr/length pairs actually
48 * used, at most nents.
49 *
50 * Device ownership issues as mentioned above for pci_map_single are
51 * the same here.
52 */
53int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
54 int nents, int direction)
63{ 55{
64 /* 56 int i;
65 * we fall back to GFP_DMA when the mask isn't all 1s,
66 * so we can't guarantee allocations that must be
67 * within a tighter range than GFP_DMA..
68 * RED-PEN this won't work for pci_map_single. Caller has to
69 * use GFP_DMA in the first place.
70 */
71 if (mask < 0x00ffffff)
72 return 0;
73 57
74 return 1; 58 BUG_ON(direction == DMA_NONE);
75} 59 for (i = 0; i < nents; i++ ) {
76EXPORT_SYMBOL(dma_supported); 60 struct scatterlist *s = &sg[i];
61 BUG_ON(!s->page);
62 s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
63 if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
64 return 0;
65 s->dma_length = s->length;
66 }
67 return nents;
68}
77 69
78int dma_get_cache_alignment(void) 70/* Unmap a set of streaming mode DMA translations.
71 * Again, cpu read rules concerning calls here are the same as for
72 * pci_unmap_single() above.
73 */
74void nommu_unmap_sg(struct device *dev, struct scatterlist *sg,
75 int nents, int dir)
79{ 76{
80 return boot_cpu_data.x86_clflush_size;
81} 77}
82EXPORT_SYMBOL(dma_get_cache_alignment);
83 78
84static int __init check_ram(void) 79struct dma_mapping_ops nommu_dma_ops = {
85{ 80 .map_single = nommu_map_single,
86 if (end_pfn >= 0xffffffff>>PAGE_SHIFT) { 81 .unmap_single = nommu_unmap_single,
87 printk( 82 .map_sg = nommu_map_sg,
88 KERN_ERR "WARNING more than 4GB of memory but IOMMU not compiled in.\n" 83 .unmap_sg = nommu_unmap_sg,
89 KERN_ERR "WARNING 32bit PCI may malfunction.\n"); 84 .is_phys = 1,
90 } 85};
91 return 0;
92}
93__initcall(check_ram);
94 86
87void __init no_iommu_init(void)
88{
89 if (dma_ops)
90 return;
91 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
92 dma_ops = &nommu_dma_ops;
93 if (end_pfn > MAX_DMA32_PFN) {
94 printk(KERN_ERR
95 "WARNING more than 4GB of memory but IOMMU disabled.\n"
96 KERN_ERR "WARNING 32bit PCI may malfunction.\n");
97 }
98}
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c
new file mode 100644
index 000000000000..3569a25ad7fb
--- /dev/null
+++ b/arch/x86_64/kernel/pci-swiotlb.c
@@ -0,0 +1,42 @@
1/* Glue code to lib/swiotlb.c */
2
3#include <linux/pci.h>
4#include <linux/cache.h>
5#include <linux/module.h>
6#include <asm/dma-mapping.h>
7#include <asm/proto.h>
8#include <asm/swiotlb.h>
9#include <asm/dma.h>
10
11int swiotlb __read_mostly;
12EXPORT_SYMBOL(swiotlb);
13
14struct dma_mapping_ops swiotlb_dma_ops = {
15 .mapping_error = swiotlb_dma_mapping_error,
16 .alloc_coherent = swiotlb_alloc_coherent,
17 .free_coherent = swiotlb_free_coherent,
18 .map_single = swiotlb_map_single,
19 .unmap_single = swiotlb_unmap_single,
20 .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
21 .sync_single_for_device = swiotlb_sync_single_for_device,
22 .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
23 .sync_single_range_for_device = swiotlb_sync_single_range_for_device,
24 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
25 .sync_sg_for_device = swiotlb_sync_sg_for_device,
26 .map_sg = swiotlb_map_sg,
27 .unmap_sg = swiotlb_unmap_sg,
28 .dma_supported = NULL,
29};
30
31void pci_swiotlb_init(void)
32{
33 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
34 if (!iommu_aperture && !no_iommu &&
35 (end_pfn > MAX_DMA32_PFN || force_iommu))
36 swiotlb = 1;
37 if (swiotlb) {
38 swiotlb_init();
39 printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
40 dma_ops = &swiotlb_dma_ops;
41 }
42}
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 6810690d6045..e7a4d2cd3968 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -45,6 +45,7 @@
45#include <linux/kexec.h> 45#include <linux/kexec.h>
46#include <linux/cpufreq.h> 46#include <linux/cpufreq.h>
47#include <linux/dmi.h> 47#include <linux/dmi.h>
48#include <linux/dma-mapping.h>
48 49
49#include <asm/mtrr.h> 50#include <asm/mtrr.h>
50#include <asm/uaccess.h> 51#include <asm/uaccess.h>
@@ -63,7 +64,9 @@
63#include <asm/setup.h> 64#include <asm/setup.h>
64#include <asm/mach_apic.h> 65#include <asm/mach_apic.h>
65#include <asm/numa.h> 66#include <asm/numa.h>
67#include <asm/swiotlb.h>
66#include <asm/sections.h> 68#include <asm/sections.h>
69#include <asm/gart-mapping.h>
67 70
68/* 71/*
69 * Machine setup.. 72 * Machine setup..
@@ -88,11 +91,6 @@ int bootloader_type;
88 91
89unsigned long saved_video_mode; 92unsigned long saved_video_mode;
90 93
91#ifdef CONFIG_SWIOTLB
92int swiotlb;
93EXPORT_SYMBOL(swiotlb);
94#endif
95
96/* 94/*
97 * Setup options 95 * Setup options
98 */ 96 */
@@ -389,11 +387,9 @@ static __init void parse_cmdline_early (char ** cmdline_p)
389 numa_setup(from+5); 387 numa_setup(from+5);
390#endif 388#endif
391 389
392#ifdef CONFIG_GART_IOMMU
393 if (!memcmp(from,"iommu=",6)) { 390 if (!memcmp(from,"iommu=",6)) {
394 iommu_setup(from+6); 391 iommu_setup(from+6);
395 } 392 }
396#endif
397 393
398 if (!memcmp(from,"oops=panic", 10)) 394 if (!memcmp(from,"oops=panic", 10))
399 panic_on_oops = 1; 395 panic_on_oops = 1;
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 5337ca0ea7b9..e93867850a4f 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -23,6 +23,7 @@
23#include <linux/bootmem.h> 23#include <linux/bootmem.h>
24#include <linux/proc_fs.h> 24#include <linux/proc_fs.h>
25#include <linux/pci.h> 25#include <linux/pci.h>
26#include <linux/dma-mapping.h>
26 27
27#include <asm/processor.h> 28#include <asm/processor.h>
28#include <asm/system.h> 29#include <asm/system.h>
@@ -38,11 +39,16 @@
38#include <asm/proto.h> 39#include <asm/proto.h>
39#include <asm/smp.h> 40#include <asm/smp.h>
40#include <asm/sections.h> 41#include <asm/sections.h>
42#include <asm/dma-mapping.h>
43#include <asm/swiotlb.h>
41 44
42#ifndef Dprintk 45#ifndef Dprintk
43#define Dprintk(x...) 46#define Dprintk(x...)
44#endif 47#endif
45 48
49struct dma_mapping_ops* dma_ops;
50EXPORT_SYMBOL(dma_ops);
51
46static unsigned long dma_reserve __initdata; 52static unsigned long dma_reserve __initdata;
47 53
48DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 54DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -423,12 +429,9 @@ void __init mem_init(void)
423 long codesize, reservedpages, datasize, initsize; 429 long codesize, reservedpages, datasize, initsize;
424 430
425#ifdef CONFIG_SWIOTLB 431#ifdef CONFIG_SWIOTLB
426 if (!iommu_aperture && 432 pci_swiotlb_init();
427 ((end_pfn-1) >= 0xffffffff>>PAGE_SHIFT || force_iommu))
428 swiotlb = 1;
429 if (swiotlb)
430 swiotlb_init();
431#endif 433#endif
434 no_iommu_init();
432 435
433 /* How many end-of-memory variables you have, grandma! */ 436 /* How many end-of-memory variables you have, grandma! */
434 max_low_pfn = end_pfn; 437 max_low_pfn = end_pfn;