diff options
author | Muli Ben-Yehuda <mulix@mulix.org> | 2006-01-11 16:44:42 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-11 22:04:55 -0500 |
commit | 17a941d854a3f7b0bb916fdeee4c9ffdcc19d8d3 (patch) | |
tree | b6b3b55318336adf769bf57141a01a9defbbb202 /arch | |
parent | 8a6fdd3e912e0ce6f723431d66baf704bf8a1d26 (diff) |
[PATCH] x86_64: Use function pointers to call DMA mapping functions
AK: I hacked Muli's original patch a lot and there were a lot
of changes - all bugs are probably to blame on me now.
There were also some changes in the fall back behaviour
for swiotlb - in particular it doesn't try to use GFP_DMA
now anymore. Also all DMA mapping operations use the
same core dma_alloc_coherent code with proper fallbacks now.
And various other changes and cleanups.
Known problems: iommu=force swiotlb=force together breaks
needs more testing.
This patch cleans up x86_64's DMA mapping dispatching code. Right now
we have three possible IOMMU types: AGP GART, swiotlb and nommu, and
in the future we will also have Xen's x86_64 swiotlb and other HW
IOMMUs for x86_64. In order to support all of them cleanly, this
patch:
- introduces a struct dma_mapping_ops with function pointers for each
of the DMA mapping operations of gart (AMD HW IOMMU), swiotlb
(software IOMMU) and nommu (no IOMMU).
- gets rid of:
if (swiotlb)
return swiotlb_xxx();
- PCI_DMA_BUS_IS_PHYS is now checked against the dma_ops being set
This makes swiotlb faster by avoiding double copying in some cases.
Signed-Off-By: Muli Ben-Yehuda <mulix@mulix.org>
Signed-Off-By: Jon D. Mason <jdmason@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86_64/Kconfig | 18 | ||||
-rw-r--r-- | arch/x86_64/kernel/Makefile | 4 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-dma.c | 286 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-gart.c | 411 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-nommu.c | 144 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-swiotlb.c | 42 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 10 | ||||
-rw-r--r-- | arch/x86_64/mm/init.c | 13 |
8 files changed, 477 insertions, 451 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 150e1ac0bfed..ee053e32a721 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
@@ -351,32 +351,24 @@ config HPET_EMULATE_RTC | |||
351 | depends on HPET_TIMER && RTC=y | 351 | depends on HPET_TIMER && RTC=y |
352 | 352 | ||
353 | config GART_IOMMU | 353 | config GART_IOMMU |
354 | bool "IOMMU support" | 354 | bool "K8 GART IOMMU support" |
355 | default y | 355 | default y |
356 | select SWIOTLB | ||
356 | depends on PCI | 357 | depends on PCI |
357 | help | 358 | help |
358 | Support the IOMMU. Needed to run systems with more than 3GB of memory | 359 | Support the IOMMU. Needed to run systems with more than 3GB of memory |
359 | properly with 32-bit PCI devices that do not support DAC (Double Address | 360 | properly with 32-bit PCI devices that do not support DAC (Double Address |
360 | Cycle). The IOMMU can be turned off at runtime with the iommu=off parameter. | 361 | Cycle). The IOMMU can be turned off at runtime with the iommu=off parameter. |
361 | Normally the kernel will take the right choice by itself. | 362 | Normally the kernel will take the right choice by itself. |
362 | This option includes a driver for the AMD Opteron/Athlon64 IOMMU | 363 | This option includes a driver for the AMD Opteron/Athlon64 northbridge IOMMU |
363 | and a software emulation used on some other systems. | 364 | and a software emulation used on other systems. |
364 | If unsure, say Y. | 365 | If unsure, say Y. |
365 | 366 | ||
366 | # need this always enabled with GART_IOMMU for the VIA workaround | 367 | # need this always enabled with GART_IOMMU for the VIA workaround |
367 | config SWIOTLB | 368 | config SWIOTLB |
368 | bool | ||
369 | depends on GART_IOMMU | ||
370 | default y | ||
371 | |||
372 | config DUMMY_IOMMU | ||
373 | bool | 369 | bool |
374 | depends on !GART_IOMMU && !SWIOTLB | ||
375 | default y | 370 | default y |
376 | help | 371 | depends on GART_IOMMU |
377 | Don't use IOMMU code. This will cause problems when you have more than 4GB | ||
378 | of memory and any 32-bit devices. Don't turn on unless you know what you | ||
379 | are doing. | ||
380 | 372 | ||
381 | config X86_MCE | 373 | config X86_MCE |
382 | bool "Machine check support" if EMBEDDED | 374 | bool "Machine check support" if EMBEDDED |
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 4601fdafb7cb..7df09edb36a1 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
@@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ | |||
8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ | 8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ |
9 | x8664_ksyms.o i387.o syscall.o vsyscall.o \ | 9 | x8664_ksyms.o i387.o syscall.o vsyscall.o \ |
10 | setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ | 10 | setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ |
11 | dmi_scan.o | 11 | dmi_scan.o pci-dma.o pci-nommu.o |
12 | 12 | ||
13 | obj-$(CONFIG_X86_MCE) += mce.o | 13 | obj-$(CONFIG_X86_MCE) += mce.o |
14 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o | 14 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o |
@@ -29,7 +29,7 @@ obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o | |||
29 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | 29 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ |
30 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | 30 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o |
31 | obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o | 31 | obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o |
32 | obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o | 32 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o |
33 | obj-$(CONFIG_KPROBES) += kprobes.o | 33 | obj-$(CONFIG_KPROBES) += kprobes.o |
34 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o | 34 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o |
35 | 35 | ||
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index cab471cf3edb..2f5d8328e2b9 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c | |||
@@ -8,53 +8,259 @@ | |||
8 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <asm/io.h> | 10 | #include <asm/io.h> |
11 | #include <asm/proto.h> | ||
11 | 12 | ||
12 | /* Map a set of buffers described by scatterlist in streaming | 13 | int iommu_merge __read_mostly = 0; |
13 | * mode for DMA. This is the scatter-gather version of the | 14 | EXPORT_SYMBOL(iommu_merge); |
14 | * above pci_map_single interface. Here the scatter gather list | 15 | |
15 | * elements are each tagged with the appropriate dma address | 16 | dma_addr_t bad_dma_address __read_mostly; |
16 | * and length. They are obtained via sg_dma_{address,length}(SG). | 17 | EXPORT_SYMBOL(bad_dma_address); |
17 | * | 18 | |
18 | * NOTE: An implementation may be able to use a smaller number of | 19 | /* This tells the BIO block layer to assume merging. Default to off |
19 | * DMA address/length pairs than there are SG table elements. | 20 | because we cannot guarantee merging later. */ |
20 | * (for example via virtual mapping capabilities) | 21 | int iommu_bio_merge __read_mostly = 0; |
21 | * The routine returns the number of addr/length pairs actually | 22 | EXPORT_SYMBOL(iommu_bio_merge); |
22 | * used, at most nents. | 23 | |
23 | * | 24 | int iommu_sac_force __read_mostly = 0; |
24 | * Device ownership issues as mentioned above for pci_map_single are | 25 | EXPORT_SYMBOL(iommu_sac_force); |
25 | * the same here. | 26 | |
27 | int no_iommu __read_mostly; | ||
28 | #ifdef CONFIG_IOMMU_DEBUG | ||
29 | int panic_on_overflow __read_mostly = 1; | ||
30 | int force_iommu __read_mostly = 1; | ||
31 | #else | ||
32 | int panic_on_overflow __read_mostly = 0; | ||
33 | int force_iommu __read_mostly= 0; | ||
34 | #endif | ||
35 | |||
36 | /* Dummy device used for NULL arguments (normally ISA). Better would | ||
37 | be probably a smaller DMA mask, but this is bug-to-bug compatible | ||
38 | to i386. */ | ||
39 | struct device fallback_dev = { | ||
40 | .bus_id = "fallback device", | ||
41 | .coherent_dma_mask = 0xffffffff, | ||
42 | .dma_mask = &fallback_dev.coherent_dma_mask, | ||
43 | }; | ||
44 | |||
45 | /* Allocate DMA memory on node near device */ | ||
46 | noinline static void * | ||
47 | dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) | ||
48 | { | ||
49 | struct page *page; | ||
50 | int node; | ||
51 | if (dev->bus == &pci_bus_type) | ||
52 | node = pcibus_to_node(to_pci_dev(dev)->bus); | ||
53 | else | ||
54 | node = numa_node_id(); | ||
55 | page = alloc_pages_node(node, gfp, order); | ||
56 | return page ? page_address(page) : NULL; | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * Allocate memory for a coherent mapping. | ||
26 | */ | 61 | */ |
27 | int dma_map_sg(struct device *hwdev, struct scatterlist *sg, | 62 | void * |
28 | int nents, int direction) | 63 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, |
64 | gfp_t gfp) | ||
29 | { | 65 | { |
30 | int i; | 66 | void *memory; |
31 | 67 | unsigned long dma_mask = 0; | |
32 | BUG_ON(direction == DMA_NONE); | 68 | u64 bus; |
33 | for (i = 0; i < nents; i++ ) { | 69 | |
34 | struct scatterlist *s = &sg[i]; | 70 | if (!dev) |
35 | BUG_ON(!s->page); | 71 | dev = &fallback_dev; |
36 | s->dma_address = virt_to_bus(page_address(s->page) +s->offset); | 72 | dma_mask = dev->coherent_dma_mask; |
37 | s->dma_length = s->length; | 73 | if (dma_mask == 0) |
74 | dma_mask = 0xffffffff; | ||
75 | |||
76 | /* Kludge to make it bug-to-bug compatible with i386. i386 | ||
77 | uses the normal dma_mask for alloc_coherent. */ | ||
78 | dma_mask &= *dev->dma_mask; | ||
79 | |||
80 | /* Why <=? Even when the mask is smaller than 4GB it is often | ||
81 | larger than 16MB and in this case we have a chance of | ||
82 | finding fitting memory in the next higher zone first. If | ||
83 | not retry with true GFP_DMA. -AK */ | ||
84 | if (dma_mask <= 0xffffffff) | ||
85 | gfp |= GFP_DMA32; | ||
86 | |||
87 | again: | ||
88 | memory = dma_alloc_pages(dev, gfp, get_order(size)); | ||
89 | if (memory == NULL) | ||
90 | return NULL; | ||
91 | |||
92 | { | ||
93 | int high, mmu; | ||
94 | bus = virt_to_bus(memory); | ||
95 | high = (bus + size) >= dma_mask; | ||
96 | mmu = high; | ||
97 | if (force_iommu && !(gfp & GFP_DMA)) | ||
98 | mmu = 1; | ||
99 | else if (high) { | ||
100 | free_pages((unsigned long)memory, | ||
101 | get_order(size)); | ||
102 | |||
103 | /* Don't use the 16MB ZONE_DMA unless absolutely | ||
104 | needed. It's better to use remapping first. */ | ||
105 | if (dma_mask < 0xffffffff && !(gfp & GFP_DMA)) { | ||
106 | gfp = (gfp & ~GFP_DMA32) | GFP_DMA; | ||
107 | goto again; | ||
108 | } | ||
109 | |||
110 | if (dma_ops->alloc_coherent) | ||
111 | return dma_ops->alloc_coherent(dev, size, | ||
112 | dma_handle, gfp); | ||
113 | return NULL; | ||
114 | } | ||
115 | |||
116 | memset(memory, 0, size); | ||
117 | if (!mmu) { | ||
118 | *dma_handle = virt_to_bus(memory); | ||
119 | return memory; | ||
120 | } | ||
121 | } | ||
122 | |||
123 | if (dma_ops->alloc_coherent) { | ||
124 | free_pages((unsigned long)memory, get_order(size)); | ||
125 | gfp &= ~(GFP_DMA|GFP_DMA32); | ||
126 | return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); | ||
127 | } | ||
128 | |||
129 | if (dma_ops->map_simple) { | ||
130 | *dma_handle = dma_ops->map_simple(dev, memory, | ||
131 | size, | ||
132 | PCI_DMA_BIDIRECTIONAL); | ||
133 | if (*dma_handle != bad_dma_address) | ||
134 | return memory; | ||
38 | } | 135 | } |
39 | return nents; | ||
40 | } | ||
41 | 136 | ||
42 | EXPORT_SYMBOL(dma_map_sg); | 137 | if (panic_on_overflow) |
138 | panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",size); | ||
139 | free_pages((unsigned long)memory, get_order(size)); | ||
140 | return NULL; | ||
141 | } | ||
142 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
43 | 143 | ||
44 | /* Unmap a set of streaming mode DMA translations. | 144 | /* |
45 | * Again, cpu read rules concerning calls here are the same as for | 145 | * Unmap coherent memory. |
46 | * pci_unmap_single() above. | 146 | * The caller must ensure that the device has finished accessing the mapping. |
47 | */ | 147 | */ |
48 | void dma_unmap_sg(struct device *dev, struct scatterlist *sg, | 148 | void dma_free_coherent(struct device *dev, size_t size, |
49 | int nents, int dir) | 149 | void *vaddr, dma_addr_t bus) |
150 | { | ||
151 | if (dma_ops->unmap_single) | ||
152 | dma_ops->unmap_single(dev, bus, size, 0); | ||
153 | free_pages((unsigned long)vaddr, get_order(size)); | ||
154 | } | ||
155 | EXPORT_SYMBOL(dma_free_coherent); | ||
156 | |||
157 | int dma_supported(struct device *dev, u64 mask) | ||
158 | { | ||
159 | if (dma_ops->dma_supported) | ||
160 | return dma_ops->dma_supported(dev, mask); | ||
161 | |||
162 | /* Copied from i386. Doesn't make much sense, because it will | ||
163 | only work for pci_alloc_coherent. | ||
164 | The caller just has to use GFP_DMA in this case. */ | ||
165 | if (mask < 0x00ffffff) | ||
166 | return 0; | ||
167 | |||
168 | /* Tell the device to use SAC when IOMMU force is on. This | ||
169 | allows the driver to use cheaper accesses in some cases. | ||
170 | |||
171 | Problem with this is that if we overflow the IOMMU area and | ||
172 | return DAC as fallback address the device may not handle it | ||
173 | correctly. | ||
174 | |||
175 | As a special case some controllers have a 39bit address | ||
176 | mode that is as efficient as 32bit (aic79xx). Don't force | ||
177 | SAC for these. Assume all masks <= 40 bits are of this | ||
178 | type. Normally this doesn't make any difference, but gives | ||
179 | more gentle handling of IOMMU overflow. */ | ||
180 | if (iommu_sac_force && (mask >= 0xffffffffffULL)) { | ||
181 | printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask); | ||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | return 1; | ||
186 | } | ||
187 | EXPORT_SYMBOL(dma_supported); | ||
188 | |||
189 | int dma_set_mask(struct device *dev, u64 mask) | ||
50 | { | 190 | { |
51 | int i; | 191 | if (!dev->dma_mask || !dma_supported(dev, mask)) |
52 | for (i = 0; i < nents; i++) { | 192 | return -EIO; |
53 | struct scatterlist *s = &sg[i]; | 193 | *dev->dma_mask = mask; |
54 | BUG_ON(s->page == NULL); | 194 | return 0; |
55 | BUG_ON(s->dma_address == 0); | ||
56 | dma_unmap_single(dev, s->dma_address, s->dma_length, dir); | ||
57 | } | ||
58 | } | 195 | } |
196 | EXPORT_SYMBOL(dma_set_mask); | ||
197 | |||
198 | /* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge] | ||
199 | [,forcesac][,fullflush][,nomerge][,biomerge] | ||
200 | size set size of iommu (in bytes) | ||
201 | noagp don't initialize the AGP driver and use full aperture. | ||
202 | off don't use the IOMMU | ||
203 | leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on) | ||
204 | memaper[=order] allocate an own aperture over RAM with size 32MB^order. | ||
205 | noforce don't force IOMMU usage. Default. | ||
206 | force Force IOMMU. | ||
207 | merge Do lazy merging. This may improve performance on some block devices. | ||
208 | Implies force (experimental) | ||
209 | biomerge Do merging at the BIO layer. This is more efficient than merge, | ||
210 | but should be only done with very big IOMMUs. Implies merge,force. | ||
211 | nomerge Don't do SG merging. | ||
212 | forcesac For SAC mode for masks <40bits (experimental) | ||
213 | fullflush Flush IOMMU on each allocation (default) | ||
214 | nofullflush Don't use IOMMU fullflush | ||
215 | allowed overwrite iommu off workarounds for specific chipsets. | ||
216 | soft Use software bounce buffering (default for Intel machines) | ||
217 | noaperture Don't touch the aperture for AGP. | ||
218 | */ | ||
219 | __init int iommu_setup(char *p) | ||
220 | { | ||
221 | iommu_merge = 1; | ||
59 | 222 | ||
60 | EXPORT_SYMBOL(dma_unmap_sg); | 223 | while (*p) { |
224 | if (!strncmp(p,"off",3)) | ||
225 | no_iommu = 1; | ||
226 | /* gart_parse_options has more force support */ | ||
227 | if (!strncmp(p,"force",5)) | ||
228 | force_iommu = 1; | ||
229 | if (!strncmp(p,"noforce",7)) { | ||
230 | iommu_merge = 0; | ||
231 | force_iommu = 0; | ||
232 | } | ||
233 | |||
234 | if (!strncmp(p, "biomerge",8)) { | ||
235 | iommu_bio_merge = 4096; | ||
236 | iommu_merge = 1; | ||
237 | force_iommu = 1; | ||
238 | } | ||
239 | if (!strncmp(p, "panic",5)) | ||
240 | panic_on_overflow = 1; | ||
241 | if (!strncmp(p, "nopanic",7)) | ||
242 | panic_on_overflow = 0; | ||
243 | if (!strncmp(p, "merge",5)) { | ||
244 | iommu_merge = 1; | ||
245 | force_iommu = 1; | ||
246 | } | ||
247 | if (!strncmp(p, "nomerge",7)) | ||
248 | iommu_merge = 0; | ||
249 | if (!strncmp(p, "forcesac",8)) | ||
250 | iommu_sac_force = 1; | ||
251 | |||
252 | #ifdef CONFIG_SWIOTLB | ||
253 | if (!strncmp(p, "soft",4)) | ||
254 | swiotlb = 1; | ||
255 | #endif | ||
256 | |||
257 | #ifdef CONFIG_GART_IOMMU | ||
258 | gart_parse_options(p); | ||
259 | #endif | ||
260 | |||
261 | p += strcspn(p, ","); | ||
262 | if (*p == ',') | ||
263 | ++p; | ||
264 | } | ||
265 | return 1; | ||
266 | } | ||
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 116e00f1bb7b..c37fc7726ba6 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c | |||
@@ -30,8 +30,8 @@ | |||
30 | #include <asm/proto.h> | 30 | #include <asm/proto.h> |
31 | #include <asm/cacheflush.h> | 31 | #include <asm/cacheflush.h> |
32 | #include <asm/kdebug.h> | 32 | #include <asm/kdebug.h> |
33 | 33 | #include <asm/swiotlb.h> | |
34 | dma_addr_t bad_dma_address; | 34 | #include <asm/dma.h> |
35 | 35 | ||
36 | unsigned long iommu_bus_base; /* GART remapping area (physical) */ | 36 | unsigned long iommu_bus_base; /* GART remapping area (physical) */ |
37 | static unsigned long iommu_size; /* size of remapping area bytes */ | 37 | static unsigned long iommu_size; /* size of remapping area bytes */ |
@@ -39,18 +39,6 @@ static unsigned long iommu_pages; /* .. and in pages */ | |||
39 | 39 | ||
40 | u32 *iommu_gatt_base; /* Remapping table */ | 40 | u32 *iommu_gatt_base; /* Remapping table */ |
41 | 41 | ||
42 | int no_iommu; | ||
43 | static int no_agp; | ||
44 | #ifdef CONFIG_IOMMU_DEBUG | ||
45 | int panic_on_overflow = 1; | ||
46 | int force_iommu = 1; | ||
47 | #else | ||
48 | int panic_on_overflow = 0; | ||
49 | int force_iommu = 0; | ||
50 | #endif | ||
51 | int iommu_merge = 1; | ||
52 | int iommu_sac_force = 0; | ||
53 | |||
54 | /* If this is disabled the IOMMU will use an optimized flushing strategy | 42 | /* If this is disabled the IOMMU will use an optimized flushing strategy |
55 | of only flushing when an mapping is reused. With it true the GART is flushed | 43 | of only flushing when an mapping is reused. With it true the GART is flushed |
56 | for every mapping. Problem is that doing the lazy flush seems to trigger | 44 | for every mapping. Problem is that doing the lazy flush seems to trigger |
@@ -58,10 +46,6 @@ int iommu_sac_force = 0; | |||
58 | also seen with Qlogic at least). */ | 46 | also seen with Qlogic at least). */ |
59 | int iommu_fullflush = 1; | 47 | int iommu_fullflush = 1; |
60 | 48 | ||
61 | /* This tells the BIO block layer to assume merging. Default to off | ||
62 | because we cannot guarantee merging later. */ | ||
63 | int iommu_bio_merge = 0; | ||
64 | |||
65 | #define MAX_NB 8 | 49 | #define MAX_NB 8 |
66 | 50 | ||
67 | /* Allocation bitmap for the remapping area */ | 51 | /* Allocation bitmap for the remapping area */ |
@@ -102,16 +86,6 @@ AGPEXTERN __u32 *agp_gatt_table; | |||
102 | 86 | ||
103 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ | 87 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ |
104 | static int need_flush; /* global flush state. set for each gart wrap */ | 88 | static int need_flush; /* global flush state. set for each gart wrap */ |
105 | static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem, | ||
106 | size_t size, int dir, int do_panic); | ||
107 | |||
108 | /* Dummy device used for NULL arguments (normally ISA). Better would | ||
109 | be probably a smaller DMA mask, but this is bug-to-bug compatible to i386. */ | ||
110 | static struct device fallback_dev = { | ||
111 | .bus_id = "fallback device", | ||
112 | .coherent_dma_mask = 0xffffffff, | ||
113 | .dma_mask = &fallback_dev.coherent_dma_mask, | ||
114 | }; | ||
115 | 89 | ||
116 | static unsigned long alloc_iommu(int size) | 90 | static unsigned long alloc_iommu(int size) |
117 | { | 91 | { |
@@ -185,114 +159,7 @@ static void flush_gart(struct device *dev) | |||
185 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 159 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
186 | } | 160 | } |
187 | 161 | ||
188 | /* Allocate DMA memory on node near device */ | ||
189 | noinline | ||
190 | static void *dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) | ||
191 | { | ||
192 | struct page *page; | ||
193 | int node; | ||
194 | if (dev->bus == &pci_bus_type) | ||
195 | node = pcibus_to_node(to_pci_dev(dev)->bus); | ||
196 | else | ||
197 | node = numa_node_id(); | ||
198 | page = alloc_pages_node(node, gfp, order); | ||
199 | return page ? page_address(page) : NULL; | ||
200 | } | ||
201 | |||
202 | /* | ||
203 | * Allocate memory for a coherent mapping. | ||
204 | */ | ||
205 | void * | ||
206 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | ||
207 | gfp_t gfp) | ||
208 | { | ||
209 | void *memory; | ||
210 | unsigned long dma_mask = 0; | ||
211 | u64 bus; | ||
212 | 162 | ||
213 | if (!dev) | ||
214 | dev = &fallback_dev; | ||
215 | dma_mask = dev->coherent_dma_mask; | ||
216 | if (dma_mask == 0) | ||
217 | dma_mask = 0xffffffff; | ||
218 | |||
219 | /* Kludge to make it bug-to-bug compatible with i386. i386 | ||
220 | uses the normal dma_mask for alloc_coherent. */ | ||
221 | dma_mask &= *dev->dma_mask; | ||
222 | |||
223 | /* Why <=? Even when the mask is smaller than 4GB it is often larger | ||
224 | than 16MB and in this case we have a chance of finding fitting memory | ||
225 | in the next higher zone first. If not retry with true GFP_DMA. -AK */ | ||
226 | if (dma_mask <= 0xffffffff) | ||
227 | gfp |= GFP_DMA32; | ||
228 | |||
229 | again: | ||
230 | memory = dma_alloc_pages(dev, gfp, get_order(size)); | ||
231 | if (memory == NULL) | ||
232 | return NULL; | ||
233 | |||
234 | { | ||
235 | int high, mmu; | ||
236 | bus = virt_to_bus(memory); | ||
237 | high = (bus + size) >= dma_mask; | ||
238 | mmu = high; | ||
239 | if (force_iommu && !(gfp & GFP_DMA)) | ||
240 | mmu = 1; | ||
241 | if (no_iommu || dma_mask < 0xffffffffUL) { | ||
242 | if (high) { | ||
243 | free_pages((unsigned long)memory, | ||
244 | get_order(size)); | ||
245 | |||
246 | if (swiotlb) { | ||
247 | return | ||
248 | swiotlb_alloc_coherent(dev, size, | ||
249 | dma_handle, | ||
250 | gfp); | ||
251 | } | ||
252 | |||
253 | if (!(gfp & GFP_DMA)) { | ||
254 | gfp = (gfp & ~GFP_DMA32) | GFP_DMA; | ||
255 | goto again; | ||
256 | } | ||
257 | return NULL; | ||
258 | } | ||
259 | mmu = 0; | ||
260 | } | ||
261 | memset(memory, 0, size); | ||
262 | if (!mmu) { | ||
263 | *dma_handle = virt_to_bus(memory); | ||
264 | return memory; | ||
265 | } | ||
266 | } | ||
267 | |||
268 | *dma_handle = dma_map_area(dev, bus, size, PCI_DMA_BIDIRECTIONAL, 0); | ||
269 | if (*dma_handle == bad_dma_address) | ||
270 | goto error; | ||
271 | flush_gart(dev); | ||
272 | return memory; | ||
273 | |||
274 | error: | ||
275 | if (panic_on_overflow) | ||
276 | panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", size); | ||
277 | free_pages((unsigned long)memory, get_order(size)); | ||
278 | return NULL; | ||
279 | } | ||
280 | |||
281 | /* | ||
282 | * Unmap coherent memory. | ||
283 | * The caller must ensure that the device has finished accessing the mapping. | ||
284 | */ | ||
285 | void dma_free_coherent(struct device *dev, size_t size, | ||
286 | void *vaddr, dma_addr_t bus) | ||
287 | { | ||
288 | if (swiotlb) { | ||
289 | swiotlb_free_coherent(dev, size, vaddr, bus); | ||
290 | return; | ||
291 | } | ||
292 | |||
293 | dma_unmap_single(dev, bus, size, 0); | ||
294 | free_pages((unsigned long)vaddr, get_order(size)); | ||
295 | } | ||
296 | 163 | ||
297 | #ifdef CONFIG_IOMMU_LEAK | 164 | #ifdef CONFIG_IOMMU_LEAK |
298 | 165 | ||
@@ -326,7 +193,7 @@ void dump_leak(void) | |||
326 | #define CLEAR_LEAK(x) | 193 | #define CLEAR_LEAK(x) |
327 | #endif | 194 | #endif |
328 | 195 | ||
329 | static void iommu_full(struct device *dev, size_t size, int dir, int do_panic) | 196 | static void iommu_full(struct device *dev, size_t size, int dir) |
330 | { | 197 | { |
331 | /* | 198 | /* |
332 | * Ran out of IOMMU space for this operation. This is very bad. | 199 | * Ran out of IOMMU space for this operation. This is very bad. |
@@ -342,11 +209,11 @@ static void iommu_full(struct device *dev, size_t size, int dir, int do_panic) | |||
342 | "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", | 209 | "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", |
343 | size, dev->bus_id); | 210 | size, dev->bus_id); |
344 | 211 | ||
345 | if (size > PAGE_SIZE*EMERGENCY_PAGES && do_panic) { | 212 | if (size > PAGE_SIZE*EMERGENCY_PAGES) { |
346 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) | 213 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) |
347 | panic("PCI-DMA: Memory would be corrupted\n"); | 214 | panic("PCI-DMA: Memory would be corrupted\n"); |
348 | if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) | 215 | if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) |
349 | panic("PCI-DMA: Random memory would be DMAed\n"); | 216 | panic(KERN_ERR "PCI-DMA: Random memory would be DMAed\n"); |
350 | } | 217 | } |
351 | 218 | ||
352 | #ifdef CONFIG_IOMMU_LEAK | 219 | #ifdef CONFIG_IOMMU_LEAK |
@@ -385,8 +252,8 @@ static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t | |||
385 | /* Map a single continuous physical area into the IOMMU. | 252 | /* Map a single continuous physical area into the IOMMU. |
386 | * Caller needs to check if the iommu is needed and flush. | 253 | * Caller needs to check if the iommu is needed and flush. |
387 | */ | 254 | */ |
388 | static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem, | 255 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, |
389 | size_t size, int dir, int do_panic) | 256 | size_t size, int dir) |
390 | { | 257 | { |
391 | unsigned long npages = to_pages(phys_mem, size); | 258 | unsigned long npages = to_pages(phys_mem, size); |
392 | unsigned long iommu_page = alloc_iommu(npages); | 259 | unsigned long iommu_page = alloc_iommu(npages); |
@@ -396,7 +263,7 @@ static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem, | |||
396 | return phys_mem; | 263 | return phys_mem; |
397 | if (panic_on_overflow) | 264 | if (panic_on_overflow) |
398 | panic("dma_map_area overflow %lu bytes\n", size); | 265 | panic("dma_map_area overflow %lu bytes\n", size); |
399 | iommu_full(dev, size, dir, do_panic); | 266 | iommu_full(dev, size, dir); |
400 | return bad_dma_address; | 267 | return bad_dma_address; |
401 | } | 268 | } |
402 | 269 | ||
@@ -408,15 +275,21 @@ static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem, | |||
408 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); | 275 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); |
409 | } | 276 | } |
410 | 277 | ||
278 | static dma_addr_t gart_map_simple(struct device *dev, char *buf, | ||
279 | size_t size, int dir) | ||
280 | { | ||
281 | dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); | ||
282 | flush_gart(dev); | ||
283 | return map; | ||
284 | } | ||
285 | |||
411 | /* Map a single area into the IOMMU */ | 286 | /* Map a single area into the IOMMU */ |
412 | dma_addr_t dma_map_single(struct device *dev, void *addr, size_t size, int dir) | 287 | dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir) |
413 | { | 288 | { |
414 | unsigned long phys_mem, bus; | 289 | unsigned long phys_mem, bus; |
415 | 290 | ||
416 | BUG_ON(dir == DMA_NONE); | 291 | BUG_ON(dir == DMA_NONE); |
417 | 292 | ||
418 | if (swiotlb) | ||
419 | return swiotlb_map_single(dev,addr,size,dir); | ||
420 | if (!dev) | 293 | if (!dev) |
421 | dev = &fallback_dev; | 294 | dev = &fallback_dev; |
422 | 295 | ||
@@ -424,10 +297,24 @@ dma_addr_t dma_map_single(struct device *dev, void *addr, size_t size, int dir) | |||
424 | if (!need_iommu(dev, phys_mem, size)) | 297 | if (!need_iommu(dev, phys_mem, size)) |
425 | return phys_mem; | 298 | return phys_mem; |
426 | 299 | ||
427 | bus = dma_map_area(dev, phys_mem, size, dir, 1); | 300 | bus = gart_map_simple(dev, addr, size, dir); |
428 | flush_gart(dev); | ||
429 | return bus; | 301 | return bus; |
430 | } | 302 | } |
303 | |||
304 | /* | ||
305 | * Wrapper for pci_unmap_single working with scatterlists. | ||
306 | */ | ||
307 | void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | ||
308 | { | ||
309 | int i; | ||
310 | |||
311 | for (i = 0; i < nents; i++) { | ||
312 | struct scatterlist *s = &sg[i]; | ||
313 | if (!s->dma_length || !s->length) | ||
314 | break; | ||
315 | dma_unmap_single(dev, s->dma_address, s->dma_length, dir); | ||
316 | } | ||
317 | } | ||
431 | 318 | ||
432 | /* Fallback for dma_map_sg in case of overflow */ | 319 | /* Fallback for dma_map_sg in case of overflow */ |
433 | static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | 320 | static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, |
@@ -443,10 +330,10 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
443 | struct scatterlist *s = &sg[i]; | 330 | struct scatterlist *s = &sg[i]; |
444 | unsigned long addr = page_to_phys(s->page) + s->offset; | 331 | unsigned long addr = page_to_phys(s->page) + s->offset; |
445 | if (nonforced_iommu(dev, addr, s->length)) { | 332 | if (nonforced_iommu(dev, addr, s->length)) { |
446 | addr = dma_map_area(dev, addr, s->length, dir, 0); | 333 | addr = dma_map_area(dev, addr, s->length, dir); |
447 | if (addr == bad_dma_address) { | 334 | if (addr == bad_dma_address) { |
448 | if (i > 0) | 335 | if (i > 0) |
449 | dma_unmap_sg(dev, sg, i, dir); | 336 | gart_unmap_sg(dev, sg, i, dir); |
450 | nents = 0; | 337 | nents = 0; |
451 | sg[0].dma_length = 0; | 338 | sg[0].dma_length = 0; |
452 | break; | 339 | break; |
@@ -515,7 +402,7 @@ static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat, | |||
515 | * DMA map all entries in a scatterlist. | 402 | * DMA map all entries in a scatterlist. |
516 | * Merge chunks that have page aligned sizes into a continuous mapping. | 403 | * Merge chunks that have page aligned sizes into a continuous mapping. |
517 | */ | 404 | */ |
518 | int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | 405 | int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) |
519 | { | 406 | { |
520 | int i; | 407 | int i; |
521 | int out; | 408 | int out; |
@@ -527,8 +414,6 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
527 | if (nents == 0) | 414 | if (nents == 0) |
528 | return 0; | 415 | return 0; |
529 | 416 | ||
530 | if (swiotlb) | ||
531 | return swiotlb_map_sg(dev,sg,nents,dir); | ||
532 | if (!dev) | 417 | if (!dev) |
533 | dev = &fallback_dev; | 418 | dev = &fallback_dev; |
534 | 419 | ||
@@ -571,13 +456,13 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
571 | 456 | ||
572 | error: | 457 | error: |
573 | flush_gart(NULL); | 458 | flush_gart(NULL); |
574 | dma_unmap_sg(dev, sg, nents, dir); | 459 | gart_unmap_sg(dev, sg, nents, dir); |
575 | /* When it was forced try again unforced */ | 460 | /* When it was forced try again unforced */ |
576 | if (force_iommu) | 461 | if (force_iommu) |
577 | return dma_map_sg_nonforce(dev, sg, nents, dir); | 462 | return dma_map_sg_nonforce(dev, sg, nents, dir); |
578 | if (panic_on_overflow) | 463 | if (panic_on_overflow) |
579 | panic("dma_map_sg: overflow on %lu pages\n", pages); | 464 | panic("dma_map_sg: overflow on %lu pages\n", pages); |
580 | iommu_full(dev, pages << PAGE_SHIFT, dir, 0); | 465 | iommu_full(dev, pages << PAGE_SHIFT, dir); |
581 | for (i = 0; i < nents; i++) | 466 | for (i = 0; i < nents; i++) |
582 | sg[i].dma_address = bad_dma_address; | 467 | sg[i].dma_address = bad_dma_address; |
583 | return 0; | 468 | return 0; |
@@ -586,18 +471,13 @@ error: | |||
586 | /* | 471 | /* |
587 | * Free a DMA mapping. | 472 | * Free a DMA mapping. |
588 | */ | 473 | */ |
589 | void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, | 474 | void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, |
590 | size_t size, int direction) | 475 | size_t size, int direction) |
591 | { | 476 | { |
592 | unsigned long iommu_page; | 477 | unsigned long iommu_page; |
593 | int npages; | 478 | int npages; |
594 | int i; | 479 | int i; |
595 | 480 | ||
596 | if (swiotlb) { | ||
597 | swiotlb_unmap_single(dev,dma_addr,size,direction); | ||
598 | return; | ||
599 | } | ||
600 | |||
601 | if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || | 481 | if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || |
602 | dma_addr >= iommu_bus_base + iommu_size) | 482 | dma_addr >= iommu_bus_base + iommu_size) |
603 | return; | 483 | return; |
@@ -610,68 +490,7 @@ void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
610 | free_iommu(iommu_page, npages); | 490 | free_iommu(iommu_page, npages); |
611 | } | 491 | } |
612 | 492 | ||
613 | /* | 493 | static int no_agp; |
614 | * Wrapper for pci_unmap_single working with scatterlists. | ||
615 | */ | ||
616 | void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | ||
617 | { | ||
618 | int i; | ||
619 | if (swiotlb) { | ||
620 | swiotlb_unmap_sg(dev,sg,nents,dir); | ||
621 | return; | ||
622 | } | ||
623 | for (i = 0; i < nents; i++) { | ||
624 | struct scatterlist *s = &sg[i]; | ||
625 | if (!s->dma_length || !s->length) | ||
626 | break; | ||
627 | dma_unmap_single(dev, s->dma_address, s->dma_length, dir); | ||
628 | } | ||
629 | } | ||
630 | |||
631 | int dma_supported(struct device *dev, u64 mask) | ||
632 | { | ||
633 | /* Copied from i386. Doesn't make much sense, because it will | ||
634 | only work for pci_alloc_coherent. | ||
635 | The caller just has to use GFP_DMA in this case. */ | ||
636 | if (mask < 0x00ffffff) | ||
637 | return 0; | ||
638 | |||
639 | /* Tell the device to use SAC when IOMMU force is on. | ||
640 | This allows the driver to use cheaper accesses in some cases. | ||
641 | |||
642 | Problem with this is that if we overflow the IOMMU area | ||
643 | and return DAC as fallback address the device may not handle it correctly. | ||
644 | |||
645 | As a special case some controllers have a 39bit address mode | ||
646 | that is as efficient as 32bit (aic79xx). Don't force SAC for these. | ||
647 | Assume all masks <= 40 bits are of this type. Normally this doesn't | ||
648 | make any difference, but gives more gentle handling of IOMMU overflow. */ | ||
649 | if (iommu_sac_force && (mask >= 0xffffffffffULL)) { | ||
650 | printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask); | ||
651 | return 0; | ||
652 | } | ||
653 | |||
654 | return 1; | ||
655 | } | ||
656 | |||
657 | int dma_get_cache_alignment(void) | ||
658 | { | ||
659 | return boot_cpu_data.x86_clflush_size; | ||
660 | } | ||
661 | |||
662 | EXPORT_SYMBOL(dma_unmap_sg); | ||
663 | EXPORT_SYMBOL(dma_map_sg); | ||
664 | EXPORT_SYMBOL(dma_map_single); | ||
665 | EXPORT_SYMBOL(dma_unmap_single); | ||
666 | EXPORT_SYMBOL(dma_supported); | ||
667 | EXPORT_SYMBOL(no_iommu); | ||
668 | EXPORT_SYMBOL(force_iommu); | ||
669 | EXPORT_SYMBOL(bad_dma_address); | ||
670 | EXPORT_SYMBOL(iommu_bio_merge); | ||
671 | EXPORT_SYMBOL(iommu_sac_force); | ||
672 | EXPORT_SYMBOL(dma_get_cache_alignment); | ||
673 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
674 | EXPORT_SYMBOL(dma_free_coherent); | ||
675 | 494 | ||
676 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | 495 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) |
677 | { | 496 | { |
@@ -778,6 +597,21 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
778 | 597 | ||
779 | extern int agp_amd64_init(void); | 598 | extern int agp_amd64_init(void); |
780 | 599 | ||
600 | static struct dma_mapping_ops gart_dma_ops = { | ||
601 | .mapping_error = NULL, | ||
602 | .map_single = gart_map_single, | ||
603 | .map_simple = gart_map_simple, | ||
604 | .unmap_single = gart_unmap_single, | ||
605 | .sync_single_for_cpu = NULL, | ||
606 | .sync_single_for_device = NULL, | ||
607 | .sync_single_range_for_cpu = NULL, | ||
608 | .sync_single_range_for_device = NULL, | ||
609 | .sync_sg_for_cpu = NULL, | ||
610 | .sync_sg_for_device = NULL, | ||
611 | .map_sg = gart_map_sg, | ||
612 | .unmap_sg = gart_unmap_sg, | ||
613 | }; | ||
614 | |||
781 | static int __init pci_iommu_init(void) | 615 | static int __init pci_iommu_init(void) |
782 | { | 616 | { |
783 | struct agp_kern_info info; | 617 | struct agp_kern_info info; |
@@ -799,16 +633,15 @@ static int __init pci_iommu_init(void) | |||
799 | 633 | ||
800 | if (swiotlb) { | 634 | if (swiotlb) { |
801 | no_iommu = 1; | 635 | no_iommu = 1; |
802 | printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); | ||
803 | return -1; | 636 | return -1; |
804 | } | 637 | } |
805 | 638 | ||
806 | if (no_iommu || | 639 | if (no_iommu || |
807 | (!force_iommu && (end_pfn-1) < 0xffffffff>>PAGE_SHIFT) || | 640 | (!force_iommu && end_pfn <= MAX_DMA32_PFN) || |
808 | !iommu_aperture || | 641 | !iommu_aperture || |
809 | (no_agp && init_k8_gatt(&info) < 0)) { | 642 | (no_agp && init_k8_gatt(&info) < 0)) { |
810 | printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); | ||
811 | no_iommu = 1; | 643 | no_iommu = 1; |
644 | no_iommu_init(); | ||
812 | return -1; | 645 | return -1; |
813 | } | 646 | } |
814 | 647 | ||
@@ -885,100 +718,50 @@ static int __init pci_iommu_init(void) | |||
885 | 718 | ||
886 | flush_gart(NULL); | 719 | flush_gart(NULL); |
887 | 720 | ||
721 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); | ||
722 | dma_ops = &gart_dma_ops; | ||
723 | |||
888 | return 0; | 724 | return 0; |
889 | } | 725 | } |
890 | 726 | ||
891 | /* Must execute after PCI subsystem */ | 727 | /* Must execute after PCI subsystem */ |
892 | fs_initcall(pci_iommu_init); | 728 | fs_initcall(pci_iommu_init); |
893 | 729 | ||
894 | /* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge] | 730 | void gart_parse_options(char *p) |
895 | [,forcesac][,fullflush][,nomerge][,biomerge] | 731 | { |
896 | size set size of iommu (in bytes) | 732 | int arg; |
897 | noagp don't initialize the AGP driver and use full aperture. | 733 | |
898 | off don't use the IOMMU | ||
899 | leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on) | ||
900 | memaper[=order] allocate an own aperture over RAM with size 32MB^order. | ||
901 | noforce don't force IOMMU usage. Default. | ||
902 | force Force IOMMU. | ||
903 | merge Do lazy merging. This may improve performance on some block devices. | ||
904 | Implies force (experimental) | ||
905 | biomerge Do merging at the BIO layer. This is more efficient than merge, | ||
906 | but should be only done with very big IOMMUs. Implies merge,force. | ||
907 | nomerge Don't do SG merging. | ||
908 | forcesac For SAC mode for masks <40bits (experimental) | ||
909 | fullflush Flush IOMMU on each allocation (default) | ||
910 | nofullflush Don't use IOMMU fullflush | ||
911 | allowed overwrite iommu off workarounds for specific chipsets. | ||
912 | soft Use software bounce buffering (default for Intel machines) | ||
913 | noaperture Don't touch the aperture for AGP. | ||
914 | */ | ||
915 | __init int iommu_setup(char *p) | ||
916 | { | ||
917 | int arg; | ||
918 | |||
919 | while (*p) { | ||
920 | if (!strncmp(p,"noagp",5)) | ||
921 | no_agp = 1; | ||
922 | if (!strncmp(p,"off",3)) | ||
923 | no_iommu = 1; | ||
924 | if (!strncmp(p,"force",5)) { | ||
925 | force_iommu = 1; | ||
926 | iommu_aperture_allowed = 1; | ||
927 | } | ||
928 | if (!strncmp(p,"allowed",7)) | ||
929 | iommu_aperture_allowed = 1; | ||
930 | if (!strncmp(p,"noforce",7)) { | ||
931 | iommu_merge = 0; | ||
932 | force_iommu = 0; | ||
933 | } | ||
934 | if (!strncmp(p, "memaper", 7)) { | ||
935 | fallback_aper_force = 1; | ||
936 | p += 7; | ||
937 | if (*p == '=') { | ||
938 | ++p; | ||
939 | if (get_option(&p, &arg)) | ||
940 | fallback_aper_order = arg; | ||
941 | } | ||
942 | } | ||
943 | if (!strncmp(p, "biomerge",8)) { | ||
944 | iommu_bio_merge = 4096; | ||
945 | iommu_merge = 1; | ||
946 | force_iommu = 1; | ||
947 | } | ||
948 | if (!strncmp(p, "panic",5)) | ||
949 | panic_on_overflow = 1; | ||
950 | if (!strncmp(p, "nopanic",7)) | ||
951 | panic_on_overflow = 0; | ||
952 | if (!strncmp(p, "merge",5)) { | ||
953 | iommu_merge = 1; | ||
954 | force_iommu = 1; | ||
955 | } | ||
956 | if (!strncmp(p, "nomerge",7)) | ||
957 | iommu_merge = 0; | ||
958 | if (!strncmp(p, "forcesac",8)) | ||
959 | iommu_sac_force = 1; | ||
960 | if (!strncmp(p, "fullflush",8)) | ||
961 | iommu_fullflush = 1; | ||
962 | if (!strncmp(p, "nofullflush",11)) | ||
963 | iommu_fullflush = 0; | ||
964 | if (!strncmp(p, "soft",4)) | ||
965 | swiotlb = 1; | ||
966 | if (!strncmp(p, "noaperture",10)) | ||
967 | fix_aperture = 0; | ||
968 | #ifdef CONFIG_IOMMU_LEAK | 734 | #ifdef CONFIG_IOMMU_LEAK |
969 | if (!strncmp(p,"leak",4)) { | 735 | if (!strncmp(p,"leak",4)) { |
970 | leak_trace = 1; | 736 | leak_trace = 1; |
971 | p += 4; | 737 | p += 4; |
972 | if (*p == '=') ++p; | 738 | if (*p == '=') ++p; |
973 | if (isdigit(*p) && get_option(&p, &arg)) | 739 | if (isdigit(*p) && get_option(&p, &arg)) |
974 | iommu_leak_pages = arg; | 740 | iommu_leak_pages = arg; |
975 | } else | 741 | } |
976 | #endif | 742 | #endif |
977 | if (isdigit(*p) && get_option(&p, &arg)) | 743 | if (isdigit(*p) && get_option(&p, &arg)) |
978 | iommu_size = arg; | 744 | iommu_size = arg; |
979 | p += strcspn(p, ","); | 745 | if (!strncmp(p, "fullflush",8)) |
980 | if (*p == ',') | 746 | iommu_fullflush = 1; |
981 | ++p; | 747 | if (!strncmp(p, "nofullflush",11)) |
982 | } | 748 | iommu_fullflush = 0; |
983 | return 1; | 749 | if (!strncmp(p,"noagp",5)) |
984 | } | 750 | no_agp = 1; |
751 | if (!strncmp(p, "noaperture",10)) | ||
752 | fix_aperture = 0; | ||
753 | /* duplicated from pci-dma.c */ | ||
754 | if (!strncmp(p,"force",5)) | ||
755 | iommu_aperture_allowed = 1; | ||
756 | if (!strncmp(p,"allowed",7)) | ||
757 | iommu_aperture_allowed = 1; | ||
758 | if (!strncmp(p, "memaper", 7)) { | ||
759 | fallback_aper_force = 1; | ||
760 | p += 7; | ||
761 | if (*p == '=') { | ||
762 | ++p; | ||
763 | if (get_option(&p, &arg)) | ||
764 | fallback_aper_order = arg; | ||
765 | } | ||
766 | } | ||
767 | } | ||
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c index 5a981dca87ff..3209adac3272 100644 --- a/arch/x86_64/kernel/pci-nommu.c +++ b/arch/x86_64/kernel/pci-nommu.c | |||
@@ -6,89 +6,93 @@ | |||
6 | #include <linux/string.h> | 6 | #include <linux/string.h> |
7 | #include <asm/proto.h> | 7 | #include <asm/proto.h> |
8 | #include <asm/processor.h> | 8 | #include <asm/processor.h> |
9 | #include <asm/dma.h> | ||
9 | 10 | ||
10 | int iommu_merge = 0; | 11 | static int |
11 | EXPORT_SYMBOL(iommu_merge); | 12 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) |
12 | |||
13 | dma_addr_t bad_dma_address; | ||
14 | EXPORT_SYMBOL(bad_dma_address); | ||
15 | |||
16 | int iommu_bio_merge = 0; | ||
17 | EXPORT_SYMBOL(iommu_bio_merge); | ||
18 | |||
19 | int iommu_sac_force = 0; | ||
20 | EXPORT_SYMBOL(iommu_sac_force); | ||
21 | |||
22 | /* | ||
23 | * Dummy IO MMU functions | ||
24 | */ | ||
25 | |||
26 | void *dma_alloc_coherent(struct device *hwdev, size_t size, | ||
27 | dma_addr_t *dma_handle, gfp_t gfp) | ||
28 | { | 13 | { |
29 | void *ret; | 14 | if (hwdev && bus + size > *hwdev->dma_mask) { |
30 | u64 mask; | 15 | printk(KERN_ERR |
31 | int order = get_order(size); | 16 | "nommu_%s: overflow %Lx+%lu of device mask %Lx\n", |
32 | 17 | name, (long long)bus, size, (long long)*hwdev->dma_mask); | |
33 | if (hwdev) | 18 | return 0; |
34 | mask = hwdev->coherent_dma_mask & *hwdev->dma_mask; | ||
35 | else | ||
36 | mask = 0xffffffff; | ||
37 | for (;;) { | ||
38 | ret = (void *)__get_free_pages(gfp, order); | ||
39 | if (ret == NULL) | ||
40 | return NULL; | ||
41 | *dma_handle = virt_to_bus(ret); | ||
42 | if ((*dma_handle & ~mask) == 0) | ||
43 | break; | ||
44 | free_pages((unsigned long)ret, order); | ||
45 | if (gfp & GFP_DMA) | ||
46 | return NULL; | ||
47 | gfp |= GFP_DMA; | ||
48 | } | 19 | } |
20 | return 1; | ||
21 | } | ||
49 | 22 | ||
50 | memset(ret, 0, size); | 23 | static dma_addr_t |
51 | return ret; | 24 | nommu_map_single(struct device *hwdev, void *ptr, size_t size, |
25 | int direction) | ||
26 | { | ||
27 | dma_addr_t bus = virt_to_bus(ptr); | ||
28 | if (!check_addr("map_single", hwdev, bus, size)) | ||
29 | return bad_dma_address; | ||
30 | return bus; | ||
52 | } | 31 | } |
53 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
54 | 32 | ||
55 | void dma_free_coherent(struct device *hwdev, size_t size, | 33 | void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size, |
56 | void *vaddr, dma_addr_t dma_handle) | 34 | int direction) |
57 | { | 35 | { |
58 | free_pages((unsigned long)vaddr, get_order(size)); | ||
59 | } | 36 | } |
60 | EXPORT_SYMBOL(dma_free_coherent); | ||
61 | 37 | ||
62 | int dma_supported(struct device *hwdev, u64 mask) | 38 | /* Map a set of buffers described by scatterlist in streaming |
39 | * mode for DMA. This is the scatter-gather version of the | ||
40 | * above pci_map_single interface. Here the scatter gather list | ||
41 | * elements are each tagged with the appropriate dma address | ||
42 | * and length. They are obtained via sg_dma_{address,length}(SG). | ||
43 | * | ||
44 | * NOTE: An implementation may be able to use a smaller number of | ||
45 | * DMA address/length pairs than there are SG table elements. | ||
46 | * (for example via virtual mapping capabilities) | ||
47 | * The routine returns the number of addr/length pairs actually | ||
48 | * used, at most nents. | ||
49 | * | ||
50 | * Device ownership issues as mentioned above for pci_map_single are | ||
51 | * the same here. | ||
52 | */ | ||
53 | int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, | ||
54 | int nents, int direction) | ||
63 | { | 55 | { |
64 | /* | 56 | int i; |
65 | * we fall back to GFP_DMA when the mask isn't all 1s, | ||
66 | * so we can't guarantee allocations that must be | ||
67 | * within a tighter range than GFP_DMA.. | ||
68 | * RED-PEN this won't work for pci_map_single. Caller has to | ||
69 | * use GFP_DMA in the first place. | ||
70 | */ | ||
71 | if (mask < 0x00ffffff) | ||
72 | return 0; | ||
73 | 57 | ||
74 | return 1; | 58 | BUG_ON(direction == DMA_NONE); |
75 | } | 59 | for (i = 0; i < nents; i++ ) { |
76 | EXPORT_SYMBOL(dma_supported); | 60 | struct scatterlist *s = &sg[i]; |
61 | BUG_ON(!s->page); | ||
62 | s->dma_address = virt_to_bus(page_address(s->page) +s->offset); | ||
63 | if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) | ||
64 | return 0; | ||
65 | s->dma_length = s->length; | ||
66 | } | ||
67 | return nents; | ||
68 | } | ||
77 | 69 | ||
78 | int dma_get_cache_alignment(void) | 70 | /* Unmap a set of streaming mode DMA translations. |
71 | * Again, cpu read rules concerning calls here are the same as for | ||
72 | * pci_unmap_single() above. | ||
73 | */ | ||
74 | void nommu_unmap_sg(struct device *dev, struct scatterlist *sg, | ||
75 | int nents, int dir) | ||
79 | { | 76 | { |
80 | return boot_cpu_data.x86_clflush_size; | ||
81 | } | 77 | } |
82 | EXPORT_SYMBOL(dma_get_cache_alignment); | ||
83 | 78 | ||
84 | static int __init check_ram(void) | 79 | struct dma_mapping_ops nommu_dma_ops = { |
85 | { | 80 | .map_single = nommu_map_single, |
86 | if (end_pfn >= 0xffffffff>>PAGE_SHIFT) { | 81 | .unmap_single = nommu_unmap_single, |
87 | printk( | 82 | .map_sg = nommu_map_sg, |
88 | KERN_ERR "WARNING more than 4GB of memory but IOMMU not compiled in.\n" | 83 | .unmap_sg = nommu_unmap_sg, |
89 | KERN_ERR "WARNING 32bit PCI may malfunction.\n"); | 84 | .is_phys = 1, |
90 | } | 85 | }; |
91 | return 0; | ||
92 | } | ||
93 | __initcall(check_ram); | ||
94 | 86 | ||
87 | void __init no_iommu_init(void) | ||
88 | { | ||
89 | if (dma_ops) | ||
90 | return; | ||
91 | printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); | ||
92 | dma_ops = &nommu_dma_ops; | ||
93 | if (end_pfn > MAX_DMA32_PFN) { | ||
94 | printk(KERN_ERR | ||
95 | "WARNING more than 4GB of memory but IOMMU disabled.\n" | ||
96 | KERN_ERR "WARNING 32bit PCI may malfunction.\n"); | ||
97 | } | ||
98 | } | ||
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c new file mode 100644 index 000000000000..3569a25ad7fb --- /dev/null +++ b/arch/x86_64/kernel/pci-swiotlb.c | |||
@@ -0,0 +1,42 @@ | |||
1 | /* Glue code to lib/swiotlb.c */ | ||
2 | |||
3 | #include <linux/pci.h> | ||
4 | #include <linux/cache.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <asm/dma-mapping.h> | ||
7 | #include <asm/proto.h> | ||
8 | #include <asm/swiotlb.h> | ||
9 | #include <asm/dma.h> | ||
10 | |||
11 | int swiotlb __read_mostly; | ||
12 | EXPORT_SYMBOL(swiotlb); | ||
13 | |||
14 | struct dma_mapping_ops swiotlb_dma_ops = { | ||
15 | .mapping_error = swiotlb_dma_mapping_error, | ||
16 | .alloc_coherent = swiotlb_alloc_coherent, | ||
17 | .free_coherent = swiotlb_free_coherent, | ||
18 | .map_single = swiotlb_map_single, | ||
19 | .unmap_single = swiotlb_unmap_single, | ||
20 | .sync_single_for_cpu = swiotlb_sync_single_for_cpu, | ||
21 | .sync_single_for_device = swiotlb_sync_single_for_device, | ||
22 | .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, | ||
23 | .sync_single_range_for_device = swiotlb_sync_single_range_for_device, | ||
24 | .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, | ||
25 | .sync_sg_for_device = swiotlb_sync_sg_for_device, | ||
26 | .map_sg = swiotlb_map_sg, | ||
27 | .unmap_sg = swiotlb_unmap_sg, | ||
28 | .dma_supported = NULL, | ||
29 | }; | ||
30 | |||
31 | void pci_swiotlb_init(void) | ||
32 | { | ||
33 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ | ||
34 | if (!iommu_aperture && !no_iommu && | ||
35 | (end_pfn > MAX_DMA32_PFN || force_iommu)) | ||
36 | swiotlb = 1; | ||
37 | if (swiotlb) { | ||
38 | swiotlb_init(); | ||
39 | printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); | ||
40 | dma_ops = &swiotlb_dma_ops; | ||
41 | } | ||
42 | } | ||
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 6810690d6045..e7a4d2cd3968 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/kexec.h> | 45 | #include <linux/kexec.h> |
46 | #include <linux/cpufreq.h> | 46 | #include <linux/cpufreq.h> |
47 | #include <linux/dmi.h> | 47 | #include <linux/dmi.h> |
48 | #include <linux/dma-mapping.h> | ||
48 | 49 | ||
49 | #include <asm/mtrr.h> | 50 | #include <asm/mtrr.h> |
50 | #include <asm/uaccess.h> | 51 | #include <asm/uaccess.h> |
@@ -63,7 +64,9 @@ | |||
63 | #include <asm/setup.h> | 64 | #include <asm/setup.h> |
64 | #include <asm/mach_apic.h> | 65 | #include <asm/mach_apic.h> |
65 | #include <asm/numa.h> | 66 | #include <asm/numa.h> |
67 | #include <asm/swiotlb.h> | ||
66 | #include <asm/sections.h> | 68 | #include <asm/sections.h> |
69 | #include <asm/gart-mapping.h> | ||
67 | 70 | ||
68 | /* | 71 | /* |
69 | * Machine setup.. | 72 | * Machine setup.. |
@@ -88,11 +91,6 @@ int bootloader_type; | |||
88 | 91 | ||
89 | unsigned long saved_video_mode; | 92 | unsigned long saved_video_mode; |
90 | 93 | ||
91 | #ifdef CONFIG_SWIOTLB | ||
92 | int swiotlb; | ||
93 | EXPORT_SYMBOL(swiotlb); | ||
94 | #endif | ||
95 | |||
96 | /* | 94 | /* |
97 | * Setup options | 95 | * Setup options |
98 | */ | 96 | */ |
@@ -389,11 +387,9 @@ static __init void parse_cmdline_early (char ** cmdline_p) | |||
389 | numa_setup(from+5); | 387 | numa_setup(from+5); |
390 | #endif | 388 | #endif |
391 | 389 | ||
392 | #ifdef CONFIG_GART_IOMMU | ||
393 | if (!memcmp(from,"iommu=",6)) { | 390 | if (!memcmp(from,"iommu=",6)) { |
394 | iommu_setup(from+6); | 391 | iommu_setup(from+6); |
395 | } | 392 | } |
396 | #endif | ||
397 | 393 | ||
398 | if (!memcmp(from,"oops=panic", 10)) | 394 | if (!memcmp(from,"oops=panic", 10)) |
399 | panic_on_oops = 1; | 395 | panic_on_oops = 1; |
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 5337ca0ea7b9..e93867850a4f 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/bootmem.h> | 23 | #include <linux/bootmem.h> |
24 | #include <linux/proc_fs.h> | 24 | #include <linux/proc_fs.h> |
25 | #include <linux/pci.h> | 25 | #include <linux/pci.h> |
26 | #include <linux/dma-mapping.h> | ||
26 | 27 | ||
27 | #include <asm/processor.h> | 28 | #include <asm/processor.h> |
28 | #include <asm/system.h> | 29 | #include <asm/system.h> |
@@ -38,11 +39,16 @@ | |||
38 | #include <asm/proto.h> | 39 | #include <asm/proto.h> |
39 | #include <asm/smp.h> | 40 | #include <asm/smp.h> |
40 | #include <asm/sections.h> | 41 | #include <asm/sections.h> |
42 | #include <asm/dma-mapping.h> | ||
43 | #include <asm/swiotlb.h> | ||
41 | 44 | ||
42 | #ifndef Dprintk | 45 | #ifndef Dprintk |
43 | #define Dprintk(x...) | 46 | #define Dprintk(x...) |
44 | #endif | 47 | #endif |
45 | 48 | ||
49 | struct dma_mapping_ops* dma_ops; | ||
50 | EXPORT_SYMBOL(dma_ops); | ||
51 | |||
46 | static unsigned long dma_reserve __initdata; | 52 | static unsigned long dma_reserve __initdata; |
47 | 53 | ||
48 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | 54 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); |
@@ -423,12 +429,9 @@ void __init mem_init(void) | |||
423 | long codesize, reservedpages, datasize, initsize; | 429 | long codesize, reservedpages, datasize, initsize; |
424 | 430 | ||
425 | #ifdef CONFIG_SWIOTLB | 431 | #ifdef CONFIG_SWIOTLB |
426 | if (!iommu_aperture && | 432 | pci_swiotlb_init(); |
427 | ((end_pfn-1) >= 0xffffffff>>PAGE_SHIFT || force_iommu)) | ||
428 | swiotlb = 1; | ||
429 | if (swiotlb) | ||
430 | swiotlb_init(); | ||
431 | #endif | 433 | #endif |
434 | no_iommu_init(); | ||
432 | 435 | ||
433 | /* How many end-of-memory variables you have, grandma! */ | 436 | /* How many end-of-memory variables you have, grandma! */ |
434 | max_low_pfn = end_pfn; | 437 | max_low_pfn = end_pfn; |