diff options
Diffstat (limited to 'arch/x86_64/kernel/pci-gart.c')
-rw-r--r-- | arch/x86_64/kernel/pci-gart.c | 980 |
1 files changed, 980 insertions, 0 deletions
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c new file mode 100644 index 000000000000..57f35c68aa34 --- /dev/null +++ b/arch/x86_64/kernel/pci-gart.c | |||
@@ -0,0 +1,980 @@ | |||
1 | /* | ||
2 | * Dynamic DMA mapping support for AMD Hammer. | ||
3 | * | ||
4 | * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI. | ||
5 | * This allows to use PCI devices that only support 32bit addresses on systems | ||
6 | * with more than 4GB. | ||
7 | * | ||
8 | * See Documentation/DMA-mapping.txt for the interface specification. | ||
9 | * | ||
10 | * Copyright 2002 Andi Kleen, SuSE Labs. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/ctype.h> | ||
16 | #include <linux/agp_backend.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <linux/mm.h> | ||
19 | #include <linux/string.h> | ||
20 | #include <linux/spinlock.h> | ||
21 | #include <linux/pci.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/topology.h> | ||
24 | #include <linux/interrupt.h> | ||
25 | #include <linux/bitops.h> | ||
26 | #include <asm/atomic.h> | ||
27 | #include <asm/io.h> | ||
28 | #include <asm/mtrr.h> | ||
29 | #include <asm/pgtable.h> | ||
30 | #include <asm/proto.h> | ||
31 | #include <asm/cacheflush.h> | ||
32 | #include <asm/kdebug.h> | ||
33 | |||
34 | dma_addr_t bad_dma_address; | ||
35 | |||
36 | unsigned long iommu_bus_base; /* GART remapping area (physical) */ | ||
37 | static unsigned long iommu_size; /* size of remapping area bytes */ | ||
38 | static unsigned long iommu_pages; /* .. and in pages */ | ||
39 | |||
40 | u32 *iommu_gatt_base; /* Remapping table */ | ||
41 | |||
42 | int no_iommu; | ||
43 | static int no_agp; | ||
44 | #ifdef CONFIG_IOMMU_DEBUG | ||
45 | int panic_on_overflow = 1; | ||
46 | int force_iommu = 1; | ||
47 | #else | ||
48 | int panic_on_overflow = 0; | ||
49 | int force_iommu = 0; | ||
50 | #endif | ||
51 | int iommu_merge = 1; | ||
52 | int iommu_sac_force = 0; | ||
53 | |||
54 | /* If this is disabled the IOMMU will use an optimized flushing strategy | ||
55 | of only flushing when an mapping is reused. With it true the GART is flushed | ||
56 | for every mapping. Problem is that doing the lazy flush seems to trigger | ||
57 | bugs with some popular PCI cards, in particular 3ware (but has been also | ||
58 | also seen with Qlogic at least). */ | ||
59 | int iommu_fullflush = 1; | ||
60 | |||
61 | /* This tells the BIO block layer to assume merging. Default to off | ||
62 | because we cannot guarantee merging later. */ | ||
63 | int iommu_bio_merge = 0; | ||
64 | |||
65 | #define MAX_NB 8 | ||
66 | |||
67 | /* Allocation bitmap for the remapping area */ | ||
68 | static DEFINE_SPINLOCK(iommu_bitmap_lock); | ||
69 | static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ | ||
70 | |||
71 | static u32 gart_unmapped_entry; | ||
72 | |||
73 | #define GPTE_VALID 1 | ||
74 | #define GPTE_COHERENT 2 | ||
75 | #define GPTE_ENCODE(x) \ | ||
76 | (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) | ||
77 | #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) | ||
78 | |||
79 | #define to_pages(addr,size) \ | ||
80 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) | ||
81 | |||
82 | #define for_all_nb(dev) \ | ||
83 | dev = NULL; \ | ||
84 | while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)\ | ||
85 | if (dev->bus->number == 0 && \ | ||
86 | (PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31)) | ||
87 | |||
88 | static struct pci_dev *northbridges[MAX_NB]; | ||
89 | static u32 northbridge_flush_word[MAX_NB]; | ||
90 | |||
91 | #define EMERGENCY_PAGES 32 /* = 128KB */ | ||
92 | |||
93 | #ifdef CONFIG_AGP | ||
94 | #define AGPEXTERN extern | ||
95 | #else | ||
96 | #define AGPEXTERN | ||
97 | #endif | ||
98 | |||
99 | /* backdoor interface to AGP driver */ | ||
100 | AGPEXTERN int agp_memory_reserved; | ||
101 | AGPEXTERN __u32 *agp_gatt_table; | ||
102 | |||
103 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ | ||
104 | static int need_flush; /* global flush state. set for each gart wrap */ | ||
105 | static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem, | ||
106 | size_t size, int dir, int do_panic); | ||
107 | |||
108 | /* Dummy device used for NULL arguments (normally ISA). Better would | ||
109 | be probably a smaller DMA mask, but this is bug-to-bug compatible to i386. */ | ||
110 | static struct device fallback_dev = { | ||
111 | .bus_id = "fallback device", | ||
112 | .coherent_dma_mask = 0xffffffff, | ||
113 | .dma_mask = &fallback_dev.coherent_dma_mask, | ||
114 | }; | ||
115 | |||
116 | static unsigned long alloc_iommu(int size) | ||
117 | { | ||
118 | unsigned long offset, flags; | ||
119 | |||
120 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | ||
121 | offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size); | ||
122 | if (offset == -1) { | ||
123 | need_flush = 1; | ||
124 | offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size); | ||
125 | } | ||
126 | if (offset != -1) { | ||
127 | set_bit_string(iommu_gart_bitmap, offset, size); | ||
128 | next_bit = offset+size; | ||
129 | if (next_bit >= iommu_pages) { | ||
130 | next_bit = 0; | ||
131 | need_flush = 1; | ||
132 | } | ||
133 | } | ||
134 | if (iommu_fullflush) | ||
135 | need_flush = 1; | ||
136 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | ||
137 | return offset; | ||
138 | } | ||
139 | |||
140 | static void free_iommu(unsigned long offset, int size) | ||
141 | { | ||
142 | unsigned long flags; | ||
143 | if (size == 1) { | ||
144 | clear_bit(offset, iommu_gart_bitmap); | ||
145 | return; | ||
146 | } | ||
147 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | ||
148 | __clear_bit_string(iommu_gart_bitmap, offset, size); | ||
149 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | ||
150 | } | ||
151 | |||
152 | /* | ||
153 | * Use global flush state to avoid races with multiple flushers. | ||
154 | */ | ||
155 | static void flush_gart(struct device *dev) | ||
156 | { | ||
157 | unsigned long flags; | ||
158 | int flushed = 0; | ||
159 | int i, max; | ||
160 | |||
161 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | ||
162 | if (need_flush) { | ||
163 | max = 0; | ||
164 | for (i = 0; i < MAX_NB; i++) { | ||
165 | if (!northbridges[i]) | ||
166 | continue; | ||
167 | pci_write_config_dword(northbridges[i], 0x9c, | ||
168 | northbridge_flush_word[i] | 1); | ||
169 | flushed++; | ||
170 | max = i; | ||
171 | } | ||
172 | for (i = 0; i <= max; i++) { | ||
173 | u32 w; | ||
174 | if (!northbridges[i]) | ||
175 | continue; | ||
176 | /* Make sure the hardware actually executed the flush. */ | ||
177 | do { | ||
178 | pci_read_config_dword(northbridges[i], 0x9c, &w); | ||
179 | } while (w & 1); | ||
180 | } | ||
181 | if (!flushed) | ||
182 | printk("nothing to flush?\n"); | ||
183 | need_flush = 0; | ||
184 | } | ||
185 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | ||
186 | } | ||
187 | |||
188 | /* Allocate DMA memory on node near device */ | ||
189 | noinline | ||
190 | static void *dma_alloc_pages(struct device *dev, unsigned gfp, unsigned order) | ||
191 | { | ||
192 | struct page *page; | ||
193 | int node; | ||
194 | if (dev->bus == &pci_bus_type) { | ||
195 | cpumask_t mask; | ||
196 | mask = pcibus_to_cpumask(to_pci_dev(dev)->bus); | ||
197 | node = cpu_to_node(first_cpu(mask)); | ||
198 | } else | ||
199 | node = numa_node_id(); | ||
200 | page = alloc_pages_node(node, gfp, order); | ||
201 | return page ? page_address(page) : NULL; | ||
202 | } | ||
203 | |||
204 | /* | ||
205 | * Allocate memory for a coherent mapping. | ||
206 | */ | ||
207 | void * | ||
208 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | ||
209 | unsigned gfp) | ||
210 | { | ||
211 | void *memory; | ||
212 | unsigned long dma_mask = 0; | ||
213 | u64 bus; | ||
214 | |||
215 | if (!dev) | ||
216 | dev = &fallback_dev; | ||
217 | dma_mask = dev->coherent_dma_mask; | ||
218 | if (dma_mask == 0) | ||
219 | dma_mask = 0xffffffff; | ||
220 | |||
221 | /* Kludge to make it bug-to-bug compatible with i386. i386 | ||
222 | uses the normal dma_mask for alloc_coherent. */ | ||
223 | dma_mask &= *dev->dma_mask; | ||
224 | |||
225 | again: | ||
226 | memory = dma_alloc_pages(dev, gfp, get_order(size)); | ||
227 | if (memory == NULL) | ||
228 | return NULL; | ||
229 | |||
230 | { | ||
231 | int high, mmu; | ||
232 | bus = virt_to_bus(memory); | ||
233 | high = (bus + size) >= dma_mask; | ||
234 | mmu = high; | ||
235 | if (force_iommu && !(gfp & GFP_DMA)) | ||
236 | mmu = 1; | ||
237 | if (no_iommu || dma_mask < 0xffffffffUL) { | ||
238 | if (high) { | ||
239 | free_pages((unsigned long)memory, | ||
240 | get_order(size)); | ||
241 | |||
242 | if (swiotlb) { | ||
243 | return | ||
244 | swiotlb_alloc_coherent(dev, size, | ||
245 | dma_handle, | ||
246 | gfp); | ||
247 | } | ||
248 | |||
249 | if (!(gfp & GFP_DMA)) { | ||
250 | gfp |= GFP_DMA; | ||
251 | goto again; | ||
252 | } | ||
253 | return NULL; | ||
254 | } | ||
255 | mmu = 0; | ||
256 | } | ||
257 | memset(memory, 0, size); | ||
258 | if (!mmu) { | ||
259 | *dma_handle = virt_to_bus(memory); | ||
260 | return memory; | ||
261 | } | ||
262 | } | ||
263 | |||
264 | *dma_handle = dma_map_area(dev, bus, size, PCI_DMA_BIDIRECTIONAL, 0); | ||
265 | if (*dma_handle == bad_dma_address) | ||
266 | goto error; | ||
267 | flush_gart(dev); | ||
268 | return memory; | ||
269 | |||
270 | error: | ||
271 | if (panic_on_overflow) | ||
272 | panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", size); | ||
273 | free_pages((unsigned long)memory, get_order(size)); | ||
274 | return NULL; | ||
275 | } | ||
276 | |||
277 | /* | ||
278 | * Unmap coherent memory. | ||
279 | * The caller must ensure that the device has finished accessing the mapping. | ||
280 | */ | ||
281 | void dma_free_coherent(struct device *dev, size_t size, | ||
282 | void *vaddr, dma_addr_t bus) | ||
283 | { | ||
284 | if (swiotlb) { | ||
285 | swiotlb_free_coherent(dev, size, vaddr, bus); | ||
286 | return; | ||
287 | } | ||
288 | |||
289 | dma_unmap_single(dev, bus, size, 0); | ||
290 | free_pages((unsigned long)vaddr, get_order(size)); | ||
291 | } | ||
292 | |||
293 | #ifdef CONFIG_IOMMU_LEAK | ||
294 | |||
295 | #define SET_LEAK(x) if (iommu_leak_tab) \ | ||
296 | iommu_leak_tab[x] = __builtin_return_address(0); | ||
297 | #define CLEAR_LEAK(x) if (iommu_leak_tab) \ | ||
298 | iommu_leak_tab[x] = NULL; | ||
299 | |||
300 | /* Debugging aid for drivers that don't free their IOMMU tables */ | ||
301 | static void **iommu_leak_tab; | ||
302 | static int leak_trace; | ||
303 | int iommu_leak_pages = 20; | ||
304 | void dump_leak(void) | ||
305 | { | ||
306 | int i; | ||
307 | static int dump; | ||
308 | if (dump || !iommu_leak_tab) return; | ||
309 | dump = 1; | ||
310 | show_stack(NULL,NULL); | ||
311 | /* Very crude. dump some from the end of the table too */ | ||
312 | printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages); | ||
313 | for (i = 0; i < iommu_leak_pages; i+=2) { | ||
314 | printk("%lu: ", iommu_pages-i); | ||
315 | printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]); | ||
316 | printk("%c", (i+1)%2 == 0 ? '\n' : ' '); | ||
317 | } | ||
318 | printk("\n"); | ||
319 | } | ||
320 | #else | ||
321 | #define SET_LEAK(x) | ||
322 | #define CLEAR_LEAK(x) | ||
323 | #endif | ||
324 | |||
325 | static void iommu_full(struct device *dev, size_t size, int dir, int do_panic) | ||
326 | { | ||
327 | /* | ||
328 | * Ran out of IOMMU space for this operation. This is very bad. | ||
329 | * Unfortunately the drivers cannot handle this operation properly. | ||
330 | * Return some non mapped prereserved space in the aperture and | ||
331 | * let the Northbridge deal with it. This will result in garbage | ||
332 | * in the IO operation. When the size exceeds the prereserved space | ||
333 | * memory corruption will occur or random memory will be DMAed | ||
334 | * out. Hopefully no network devices use single mappings that big. | ||
335 | */ | ||
336 | |||
337 | printk(KERN_ERR | ||
338 | "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", | ||
339 | size, dev->bus_id); | ||
340 | |||
341 | if (size > PAGE_SIZE*EMERGENCY_PAGES && do_panic) { | ||
342 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) | ||
343 | panic("PCI-DMA: Memory would be corrupted\n"); | ||
344 | if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) | ||
345 | panic("PCI-DMA: Random memory would be DMAed\n"); | ||
346 | } | ||
347 | |||
348 | #ifdef CONFIG_IOMMU_LEAK | ||
349 | dump_leak(); | ||
350 | #endif | ||
351 | } | ||
352 | |||
353 | static inline int need_iommu(struct device *dev, unsigned long addr, size_t size) | ||
354 | { | ||
355 | u64 mask = *dev->dma_mask; | ||
356 | int high = addr + size >= mask; | ||
357 | int mmu = high; | ||
358 | if (force_iommu) | ||
359 | mmu = 1; | ||
360 | if (no_iommu) { | ||
361 | if (high) | ||
362 | panic("PCI-DMA: high address but no IOMMU.\n"); | ||
363 | mmu = 0; | ||
364 | } | ||
365 | return mmu; | ||
366 | } | ||
367 | |||
368 | static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size) | ||
369 | { | ||
370 | u64 mask = *dev->dma_mask; | ||
371 | int high = addr + size >= mask; | ||
372 | int mmu = high; | ||
373 | if (no_iommu) { | ||
374 | if (high) | ||
375 | panic("PCI-DMA: high address but no IOMMU.\n"); | ||
376 | mmu = 0; | ||
377 | } | ||
378 | return mmu; | ||
379 | } | ||
380 | |||
381 | /* Map a single continuous physical area into the IOMMU. | ||
382 | * Caller needs to check if the iommu is needed and flush. | ||
383 | */ | ||
384 | static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem, | ||
385 | size_t size, int dir, int do_panic) | ||
386 | { | ||
387 | unsigned long npages = to_pages(phys_mem, size); | ||
388 | unsigned long iommu_page = alloc_iommu(npages); | ||
389 | int i; | ||
390 | if (iommu_page == -1) { | ||
391 | if (!nonforced_iommu(dev, phys_mem, size)) | ||
392 | return phys_mem; | ||
393 | if (panic_on_overflow) | ||
394 | panic("dma_map_area overflow %lu bytes\n", size); | ||
395 | iommu_full(dev, size, dir, do_panic); | ||
396 | return bad_dma_address; | ||
397 | } | ||
398 | |||
399 | for (i = 0; i < npages; i++) { | ||
400 | iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); | ||
401 | SET_LEAK(iommu_page + i); | ||
402 | phys_mem += PAGE_SIZE; | ||
403 | } | ||
404 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); | ||
405 | } | ||
406 | |||
407 | /* Map a single area into the IOMMU */ | ||
408 | dma_addr_t dma_map_single(struct device *dev, void *addr, size_t size, int dir) | ||
409 | { | ||
410 | unsigned long phys_mem, bus; | ||
411 | |||
412 | BUG_ON(dir == DMA_NONE); | ||
413 | |||
414 | if (swiotlb) | ||
415 | return swiotlb_map_single(dev,addr,size,dir); | ||
416 | if (!dev) | ||
417 | dev = &fallback_dev; | ||
418 | |||
419 | phys_mem = virt_to_phys(addr); | ||
420 | if (!need_iommu(dev, phys_mem, size)) | ||
421 | return phys_mem; | ||
422 | |||
423 | bus = dma_map_area(dev, phys_mem, size, dir, 1); | ||
424 | flush_gart(dev); | ||
425 | return bus; | ||
426 | } | ||
427 | |||
428 | /* Fallback for dma_map_sg in case of overflow */ | ||
429 | static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | ||
430 | int nents, int dir) | ||
431 | { | ||
432 | int i; | ||
433 | |||
434 | #ifdef CONFIG_IOMMU_DEBUG | ||
435 | printk(KERN_DEBUG "dma_map_sg overflow\n"); | ||
436 | #endif | ||
437 | |||
438 | for (i = 0; i < nents; i++ ) { | ||
439 | struct scatterlist *s = &sg[i]; | ||
440 | unsigned long addr = page_to_phys(s->page) + s->offset; | ||
441 | if (nonforced_iommu(dev, addr, s->length)) { | ||
442 | addr = dma_map_area(dev, addr, s->length, dir, 0); | ||
443 | if (addr == bad_dma_address) { | ||
444 | if (i > 0) | ||
445 | dma_unmap_sg(dev, sg, i, dir); | ||
446 | nents = 0; | ||
447 | sg[0].dma_length = 0; | ||
448 | break; | ||
449 | } | ||
450 | } | ||
451 | s->dma_address = addr; | ||
452 | s->dma_length = s->length; | ||
453 | } | ||
454 | flush_gart(dev); | ||
455 | return nents; | ||
456 | } | ||
457 | |||
458 | /* Map multiple scatterlist entries continuous into the first. */ | ||
459 | static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, | ||
460 | struct scatterlist *sout, unsigned long pages) | ||
461 | { | ||
462 | unsigned long iommu_start = alloc_iommu(pages); | ||
463 | unsigned long iommu_page = iommu_start; | ||
464 | int i; | ||
465 | |||
466 | if (iommu_start == -1) | ||
467 | return -1; | ||
468 | |||
469 | for (i = start; i < stopat; i++) { | ||
470 | struct scatterlist *s = &sg[i]; | ||
471 | unsigned long pages, addr; | ||
472 | unsigned long phys_addr = s->dma_address; | ||
473 | |||
474 | BUG_ON(i > start && s->offset); | ||
475 | if (i == start) { | ||
476 | *sout = *s; | ||
477 | sout->dma_address = iommu_bus_base; | ||
478 | sout->dma_address += iommu_page*PAGE_SIZE + s->offset; | ||
479 | sout->dma_length = s->length; | ||
480 | } else { | ||
481 | sout->dma_length += s->length; | ||
482 | } | ||
483 | |||
484 | addr = phys_addr; | ||
485 | pages = to_pages(s->offset, s->length); | ||
486 | while (pages--) { | ||
487 | iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); | ||
488 | SET_LEAK(iommu_page); | ||
489 | addr += PAGE_SIZE; | ||
490 | iommu_page++; | ||
491 | } | ||
492 | } | ||
493 | BUG_ON(iommu_page - iommu_start != pages); | ||
494 | return 0; | ||
495 | } | ||
496 | |||
497 | static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat, | ||
498 | struct scatterlist *sout, | ||
499 | unsigned long pages, int need) | ||
500 | { | ||
501 | if (!need) { | ||
502 | BUG_ON(stopat - start != 1); | ||
503 | *sout = sg[start]; | ||
504 | sout->dma_length = sg[start].length; | ||
505 | return 0; | ||
506 | } | ||
507 | return __dma_map_cont(sg, start, stopat, sout, pages); | ||
508 | } | ||
509 | |||
510 | /* | ||
511 | * DMA map all entries in a scatterlist. | ||
512 | * Merge chunks that have page aligned sizes into a continuous mapping. | ||
513 | */ | ||
514 | int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | ||
515 | { | ||
516 | int i; | ||
517 | int out; | ||
518 | int start; | ||
519 | unsigned long pages = 0; | ||
520 | int need = 0, nextneed; | ||
521 | |||
522 | BUG_ON(dir == DMA_NONE); | ||
523 | if (nents == 0) | ||
524 | return 0; | ||
525 | |||
526 | if (swiotlb) | ||
527 | return swiotlb_map_sg(dev,sg,nents,dir); | ||
528 | if (!dev) | ||
529 | dev = &fallback_dev; | ||
530 | |||
531 | out = 0; | ||
532 | start = 0; | ||
533 | for (i = 0; i < nents; i++) { | ||
534 | struct scatterlist *s = &sg[i]; | ||
535 | dma_addr_t addr = page_to_phys(s->page) + s->offset; | ||
536 | s->dma_address = addr; | ||
537 | BUG_ON(s->length == 0); | ||
538 | |||
539 | nextneed = need_iommu(dev, addr, s->length); | ||
540 | |||
541 | /* Handle the previous not yet processed entries */ | ||
542 | if (i > start) { | ||
543 | struct scatterlist *ps = &sg[i-1]; | ||
544 | /* Can only merge when the last chunk ends on a page | ||
545 | boundary and the new one doesn't have an offset. */ | ||
546 | if (!iommu_merge || !nextneed || !need || s->offset || | ||
547 | (ps->offset + ps->length) % PAGE_SIZE) { | ||
548 | if (dma_map_cont(sg, start, i, sg+out, pages, | ||
549 | need) < 0) | ||
550 | goto error; | ||
551 | out++; | ||
552 | pages = 0; | ||
553 | start = i; | ||
554 | } | ||
555 | } | ||
556 | |||
557 | need = nextneed; | ||
558 | pages += to_pages(s->offset, s->length); | ||
559 | } | ||
560 | if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0) | ||
561 | goto error; | ||
562 | out++; | ||
563 | flush_gart(dev); | ||
564 | if (out < nents) | ||
565 | sg[out].dma_length = 0; | ||
566 | return out; | ||
567 | |||
568 | error: | ||
569 | flush_gart(NULL); | ||
570 | dma_unmap_sg(dev, sg, nents, dir); | ||
571 | /* When it was forced try again unforced */ | ||
572 | if (force_iommu) | ||
573 | return dma_map_sg_nonforce(dev, sg, nents, dir); | ||
574 | if (panic_on_overflow) | ||
575 | panic("dma_map_sg: overflow on %lu pages\n", pages); | ||
576 | iommu_full(dev, pages << PAGE_SHIFT, dir, 0); | ||
577 | for (i = 0; i < nents; i++) | ||
578 | sg[i].dma_address = bad_dma_address; | ||
579 | return 0; | ||
580 | } | ||
581 | |||
582 | /* | ||
583 | * Free a DMA mapping. | ||
584 | */ | ||
585 | void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, | ||
586 | size_t size, int direction) | ||
587 | { | ||
588 | unsigned long iommu_page; | ||
589 | int npages; | ||
590 | int i; | ||
591 | |||
592 | if (swiotlb) { | ||
593 | swiotlb_unmap_single(dev,dma_addr,size,direction); | ||
594 | return; | ||
595 | } | ||
596 | |||
597 | if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || | ||
598 | dma_addr >= iommu_bus_base + iommu_size) | ||
599 | return; | ||
600 | iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; | ||
601 | npages = to_pages(dma_addr, size); | ||
602 | for (i = 0; i < npages; i++) { | ||
603 | iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; | ||
604 | CLEAR_LEAK(iommu_page + i); | ||
605 | } | ||
606 | free_iommu(iommu_page, npages); | ||
607 | } | ||
608 | |||
609 | /* | ||
610 | * Wrapper for pci_unmap_single working with scatterlists. | ||
611 | */ | ||
612 | void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | ||
613 | { | ||
614 | int i; | ||
615 | if (swiotlb) { | ||
616 | swiotlb_unmap_sg(dev,sg,nents,dir); | ||
617 | return; | ||
618 | } | ||
619 | for (i = 0; i < nents; i++) { | ||
620 | struct scatterlist *s = &sg[i]; | ||
621 | if (!s->dma_length || !s->length) | ||
622 | break; | ||
623 | dma_unmap_single(dev, s->dma_address, s->dma_length, dir); | ||
624 | } | ||
625 | } | ||
626 | |||
627 | int dma_supported(struct device *dev, u64 mask) | ||
628 | { | ||
629 | /* Copied from i386. Doesn't make much sense, because it will | ||
630 | only work for pci_alloc_coherent. | ||
631 | The caller just has to use GFP_DMA in this case. */ | ||
632 | if (mask < 0x00ffffff) | ||
633 | return 0; | ||
634 | |||
635 | /* Tell the device to use SAC when IOMMU force is on. | ||
636 | This allows the driver to use cheaper accesses in some cases. | ||
637 | |||
638 | Problem with this is that if we overflow the IOMMU area | ||
639 | and return DAC as fallback address the device may not handle it correctly. | ||
640 | |||
641 | As a special case some controllers have a 39bit address mode | ||
642 | that is as efficient as 32bit (aic79xx). Don't force SAC for these. | ||
643 | Assume all masks <= 40 bits are of this type. Normally this doesn't | ||
644 | make any difference, but gives more gentle handling of IOMMU overflow. */ | ||
645 | if (iommu_sac_force && (mask >= 0xffffffffffULL)) { | ||
646 | printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask); | ||
647 | return 0; | ||
648 | } | ||
649 | |||
650 | return 1; | ||
651 | } | ||
652 | |||
653 | int dma_get_cache_alignment(void) | ||
654 | { | ||
655 | return boot_cpu_data.x86_clflush_size; | ||
656 | } | ||
657 | |||
658 | EXPORT_SYMBOL(dma_unmap_sg); | ||
659 | EXPORT_SYMBOL(dma_map_sg); | ||
660 | EXPORT_SYMBOL(dma_map_single); | ||
661 | EXPORT_SYMBOL(dma_unmap_single); | ||
662 | EXPORT_SYMBOL(dma_supported); | ||
663 | EXPORT_SYMBOL(no_iommu); | ||
664 | EXPORT_SYMBOL(force_iommu); | ||
665 | EXPORT_SYMBOL(bad_dma_address); | ||
666 | EXPORT_SYMBOL(iommu_bio_merge); | ||
667 | EXPORT_SYMBOL(iommu_sac_force); | ||
668 | EXPORT_SYMBOL(dma_get_cache_alignment); | ||
669 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
670 | EXPORT_SYMBOL(dma_free_coherent); | ||
671 | |||
672 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | ||
673 | { | ||
674 | unsigned long a; | ||
675 | if (!iommu_size) { | ||
676 | iommu_size = aper_size; | ||
677 | if (!no_agp) | ||
678 | iommu_size /= 2; | ||
679 | } | ||
680 | |||
681 | a = aper + iommu_size; | ||
682 | iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a; | ||
683 | |||
684 | if (iommu_size < 64*1024*1024) | ||
685 | printk(KERN_WARNING | ||
686 | "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20); | ||
687 | |||
688 | return iommu_size; | ||
689 | } | ||
690 | |||
691 | static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) | ||
692 | { | ||
693 | unsigned aper_size = 0, aper_base_32; | ||
694 | u64 aper_base; | ||
695 | unsigned aper_order; | ||
696 | |||
697 | pci_read_config_dword(dev, 0x94, &aper_base_32); | ||
698 | pci_read_config_dword(dev, 0x90, &aper_order); | ||
699 | aper_order = (aper_order >> 1) & 7; | ||
700 | |||
701 | aper_base = aper_base_32 & 0x7fff; | ||
702 | aper_base <<= 25; | ||
703 | |||
704 | aper_size = (32 * 1024 * 1024) << aper_order; | ||
705 | if (aper_base + aper_size >= 0xffffffff || !aper_size) | ||
706 | aper_base = 0; | ||
707 | |||
708 | *size = aper_size; | ||
709 | return aper_base; | ||
710 | } | ||
711 | |||
712 | /* | ||
713 | * Private Northbridge GATT initialization in case we cannot use the | ||
714 | * AGP driver for some reason. | ||
715 | */ | ||
716 | static __init int init_k8_gatt(struct agp_kern_info *info) | ||
717 | { | ||
718 | struct pci_dev *dev; | ||
719 | void *gatt; | ||
720 | unsigned aper_base, new_aper_base; | ||
721 | unsigned aper_size, gatt_size, new_aper_size; | ||
722 | |||
723 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); | ||
724 | aper_size = aper_base = info->aper_size = 0; | ||
725 | for_all_nb(dev) { | ||
726 | new_aper_base = read_aperture(dev, &new_aper_size); | ||
727 | if (!new_aper_base) | ||
728 | goto nommu; | ||
729 | |||
730 | if (!aper_base) { | ||
731 | aper_size = new_aper_size; | ||
732 | aper_base = new_aper_base; | ||
733 | } | ||
734 | if (aper_size != new_aper_size || aper_base != new_aper_base) | ||
735 | goto nommu; | ||
736 | } | ||
737 | if (!aper_base) | ||
738 | goto nommu; | ||
739 | info->aper_base = aper_base; | ||
740 | info->aper_size = aper_size>>20; | ||
741 | |||
742 | gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); | ||
743 | gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); | ||
744 | if (!gatt) | ||
745 | panic("Cannot allocate GATT table"); | ||
746 | memset(gatt, 0, gatt_size); | ||
747 | agp_gatt_table = gatt; | ||
748 | |||
749 | for_all_nb(dev) { | ||
750 | u32 ctl; | ||
751 | u32 gatt_reg; | ||
752 | |||
753 | gatt_reg = __pa(gatt) >> 12; | ||
754 | gatt_reg <<= 4; | ||
755 | pci_write_config_dword(dev, 0x98, gatt_reg); | ||
756 | pci_read_config_dword(dev, 0x90, &ctl); | ||
757 | |||
758 | ctl |= 1; | ||
759 | ctl &= ~((1<<4) | (1<<5)); | ||
760 | |||
761 | pci_write_config_dword(dev, 0x90, ctl); | ||
762 | } | ||
763 | flush_gart(NULL); | ||
764 | |||
765 | printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); | ||
766 | return 0; | ||
767 | |||
768 | nommu: | ||
769 | /* Should not happen anymore */ | ||
770 | printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n" | ||
771 | KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction."); | ||
772 | return -1; | ||
773 | } | ||
774 | |||
775 | extern int agp_amd64_init(void); | ||
776 | |||
777 | static int __init pci_iommu_init(void) | ||
778 | { | ||
779 | struct agp_kern_info info; | ||
780 | unsigned long aper_size; | ||
781 | unsigned long iommu_start; | ||
782 | struct pci_dev *dev; | ||
783 | unsigned long scratch; | ||
784 | long i; | ||
785 | |||
786 | #ifndef CONFIG_AGP_AMD64 | ||
787 | no_agp = 1; | ||
788 | #else | ||
789 | /* Makefile puts PCI initialization via subsys_initcall first. */ | ||
790 | /* Add other K8 AGP bridge drivers here */ | ||
791 | no_agp = no_agp || | ||
792 | (agp_amd64_init() < 0) || | ||
793 | (agp_copy_info(agp_bridge, &info) < 0); | ||
794 | #endif | ||
795 | |||
796 | if (swiotlb) { | ||
797 | no_iommu = 1; | ||
798 | printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); | ||
799 | return -1; | ||
800 | } | ||
801 | |||
802 | if (no_iommu || | ||
803 | (!force_iommu && end_pfn < 0xffffffff>>PAGE_SHIFT) || | ||
804 | !iommu_aperture || | ||
805 | (no_agp && init_k8_gatt(&info) < 0)) { | ||
806 | printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); | ||
807 | no_iommu = 1; | ||
808 | return -1; | ||
809 | } | ||
810 | |||
811 | aper_size = info.aper_size * 1024 * 1024; | ||
812 | iommu_size = check_iommu_size(info.aper_base, aper_size); | ||
813 | iommu_pages = iommu_size >> PAGE_SHIFT; | ||
814 | |||
815 | iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL, | ||
816 | get_order(iommu_pages/8)); | ||
817 | if (!iommu_gart_bitmap) | ||
818 | panic("Cannot allocate iommu bitmap\n"); | ||
819 | memset(iommu_gart_bitmap, 0, iommu_pages/8); | ||
820 | |||
821 | #ifdef CONFIG_IOMMU_LEAK | ||
822 | if (leak_trace) { | ||
823 | iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, | ||
824 | get_order(iommu_pages*sizeof(void *))); | ||
825 | if (iommu_leak_tab) | ||
826 | memset(iommu_leak_tab, 0, iommu_pages * 8); | ||
827 | else | ||
828 | printk("PCI-DMA: Cannot allocate leak trace area\n"); | ||
829 | } | ||
830 | #endif | ||
831 | |||
832 | /* | ||
833 | * Out of IOMMU space handling. | ||
834 | * Reserve some invalid pages at the beginning of the GART. | ||
835 | */ | ||
836 | set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); | ||
837 | |||
838 | agp_memory_reserved = iommu_size; | ||
839 | printk(KERN_INFO | ||
840 | "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", | ||
841 | iommu_size>>20); | ||
842 | |||
843 | iommu_start = aper_size - iommu_size; | ||
844 | iommu_bus_base = info.aper_base + iommu_start; | ||
845 | bad_dma_address = iommu_bus_base; | ||
846 | iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); | ||
847 | |||
848 | /* | ||
849 | * Unmap the IOMMU part of the GART. The alias of the page is | ||
850 | * always mapped with cache enabled and there is no full cache | ||
851 | * coherency across the GART remapping. The unmapping avoids | ||
852 | * automatic prefetches from the CPU allocating cache lines in | ||
853 | * there. All CPU accesses are done via the direct mapping to | ||
854 | * the backing memory. The GART address is only used by PCI | ||
855 | * devices. | ||
856 | */ | ||
857 | clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size); | ||
858 | |||
859 | /* | ||
860 | * Try to workaround a bug (thanks to BenH) | ||
861 | * Set unmapped entries to a scratch page instead of 0. | ||
862 | * Any prefetches that hit unmapped entries won't get an bus abort | ||
863 | * then. | ||
864 | */ | ||
865 | scratch = get_zeroed_page(GFP_KERNEL); | ||
866 | if (!scratch) | ||
867 | panic("Cannot allocate iommu scratch page"); | ||
868 | gart_unmapped_entry = GPTE_ENCODE(__pa(scratch)); | ||
869 | for (i = EMERGENCY_PAGES; i < iommu_pages; i++) | ||
870 | iommu_gatt_base[i] = gart_unmapped_entry; | ||
871 | |||
872 | for_all_nb(dev) { | ||
873 | u32 flag; | ||
874 | int cpu = PCI_SLOT(dev->devfn) - 24; | ||
875 | if (cpu >= MAX_NB) | ||
876 | continue; | ||
877 | northbridges[cpu] = dev; | ||
878 | pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */ | ||
879 | northbridge_flush_word[cpu] = flag; | ||
880 | } | ||
881 | |||
882 | flush_gart(NULL); | ||
883 | |||
884 | return 0; | ||
885 | } | ||
886 | |||
887 | /* Must execute after PCI subsystem */ | ||
888 | fs_initcall(pci_iommu_init); | ||
889 | |||
890 | /* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge] | ||
891 | [,forcesac][,fullflush][,nomerge][,biomerge] | ||
892 | size set size of iommu (in bytes) | ||
893 | noagp don't initialize the AGP driver and use full aperture. | ||
894 | off don't use the IOMMU | ||
895 | leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on) | ||
896 | memaper[=order] allocate an own aperture over RAM with size 32MB^order. | ||
897 | noforce don't force IOMMU usage. Default. | ||
898 | force Force IOMMU. | ||
899 | merge Do lazy merging. This may improve performance on some block devices. | ||
900 | Implies force (experimental) | ||
901 | biomerge Do merging at the BIO layer. This is more efficient than merge, | ||
902 | but should be only done with very big IOMMUs. Implies merge,force. | ||
903 | nomerge Don't do SG merging. | ||
904 | forcesac For SAC mode for masks <40bits (experimental) | ||
905 | fullflush Flush IOMMU on each allocation (default) | ||
906 | nofullflush Don't use IOMMU fullflush | ||
907 | allowed overwrite iommu off workarounds for specific chipsets. | ||
908 | soft Use software bounce buffering (default for Intel machines) | ||
909 | noaperture Don't touch the aperture for AGP. | ||
910 | */ | ||
911 | __init int iommu_setup(char *p) | ||
912 | { | ||
913 | int arg; | ||
914 | |||
915 | while (*p) { | ||
916 | if (!strncmp(p,"noagp",5)) | ||
917 | no_agp = 1; | ||
918 | if (!strncmp(p,"off",3)) | ||
919 | no_iommu = 1; | ||
920 | if (!strncmp(p,"force",5)) { | ||
921 | force_iommu = 1; | ||
922 | iommu_aperture_allowed = 1; | ||
923 | } | ||
924 | if (!strncmp(p,"allowed",7)) | ||
925 | iommu_aperture_allowed = 1; | ||
926 | if (!strncmp(p,"noforce",7)) { | ||
927 | iommu_merge = 0; | ||
928 | force_iommu = 0; | ||
929 | } | ||
930 | if (!strncmp(p, "memaper", 7)) { | ||
931 | fallback_aper_force = 1; | ||
932 | p += 7; | ||
933 | if (*p == '=') { | ||
934 | ++p; | ||
935 | if (get_option(&p, &arg)) | ||
936 | fallback_aper_order = arg; | ||
937 | } | ||
938 | } | ||
939 | if (!strncmp(p, "biomerge",8)) { | ||
940 | iommu_bio_merge = 4096; | ||
941 | iommu_merge = 1; | ||
942 | force_iommu = 1; | ||
943 | } | ||
944 | if (!strncmp(p, "panic",5)) | ||
945 | panic_on_overflow = 1; | ||
946 | if (!strncmp(p, "nopanic",7)) | ||
947 | panic_on_overflow = 0; | ||
948 | if (!strncmp(p, "merge",5)) { | ||
949 | iommu_merge = 1; | ||
950 | force_iommu = 1; | ||
951 | } | ||
952 | if (!strncmp(p, "nomerge",7)) | ||
953 | iommu_merge = 0; | ||
954 | if (!strncmp(p, "forcesac",8)) | ||
955 | iommu_sac_force = 1; | ||
956 | if (!strncmp(p, "fullflush",8)) | ||
957 | iommu_fullflush = 1; | ||
958 | if (!strncmp(p, "nofullflush",11)) | ||
959 | iommu_fullflush = 0; | ||
960 | if (!strncmp(p, "soft",4)) | ||
961 | swiotlb = 1; | ||
962 | if (!strncmp(p, "noaperture",10)) | ||
963 | fix_aperture = 0; | ||
964 | #ifdef CONFIG_IOMMU_LEAK | ||
965 | if (!strncmp(p,"leak",4)) { | ||
966 | leak_trace = 1; | ||
967 | p += 4; | ||
968 | if (*p == '=') ++p; | ||
969 | if (isdigit(*p) && get_option(&p, &arg)) | ||
970 | iommu_leak_pages = arg; | ||
971 | } else | ||
972 | #endif | ||
973 | if (isdigit(*p) && get_option(&p, &arg)) | ||
974 | iommu_size = arg; | ||
975 | p += strcspn(p, ","); | ||
976 | if (*p == ',') | ||
977 | ++p; | ||
978 | } | ||
979 | return 1; | ||
980 | } | ||