aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel/pci-gart.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/kernel/pci-gart.c')
-rw-r--r--arch/x86_64/kernel/pci-gart.c980
1 files changed, 980 insertions, 0 deletions
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
new file mode 100644
index 000000000000..57f35c68aa34
--- /dev/null
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -0,0 +1,980 @@
1/*
2 * Dynamic DMA mapping support for AMD Hammer.
3 *
4 * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
5 * This allows to use PCI devices that only support 32bit addresses on systems
6 * with more than 4GB.
7 *
8 * See Documentation/DMA-mapping.txt for the interface specification.
9 *
10 * Copyright 2002 Andi Kleen, SuSE Labs.
11 */
12
13#include <linux/config.h>
14#include <linux/types.h>
15#include <linux/ctype.h>
16#include <linux/agp_backend.h>
17#include <linux/init.h>
18#include <linux/mm.h>
19#include <linux/string.h>
20#include <linux/spinlock.h>
21#include <linux/pci.h>
22#include <linux/module.h>
23#include <linux/topology.h>
24#include <linux/interrupt.h>
25#include <linux/bitops.h>
26#include <asm/atomic.h>
27#include <asm/io.h>
28#include <asm/mtrr.h>
29#include <asm/pgtable.h>
30#include <asm/proto.h>
31#include <asm/cacheflush.h>
32#include <asm/kdebug.h>
33
34dma_addr_t bad_dma_address;
35
36unsigned long iommu_bus_base; /* GART remapping area (physical) */
37static unsigned long iommu_size; /* size of remapping area bytes */
38static unsigned long iommu_pages; /* .. and in pages */
39
40u32 *iommu_gatt_base; /* Remapping table */
41
42int no_iommu;
43static int no_agp;
44#ifdef CONFIG_IOMMU_DEBUG
45int panic_on_overflow = 1;
46int force_iommu = 1;
47#else
48int panic_on_overflow = 0;
49int force_iommu = 0;
50#endif
51int iommu_merge = 1;
52int iommu_sac_force = 0;
53
54/* If this is disabled the IOMMU will use an optimized flushing strategy
55 of only flushing when an mapping is reused. With it true the GART is flushed
56 for every mapping. Problem is that doing the lazy flush seems to trigger
57 bugs with some popular PCI cards, in particular 3ware (but has been also
58 also seen with Qlogic at least). */
59int iommu_fullflush = 1;
60
61/* This tells the BIO block layer to assume merging. Default to off
62 because we cannot guarantee merging later. */
63int iommu_bio_merge = 0;
64
65#define MAX_NB 8
66
67/* Allocation bitmap for the remapping area */
68static DEFINE_SPINLOCK(iommu_bitmap_lock);
69static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
70
71static u32 gart_unmapped_entry;
72
73#define GPTE_VALID 1
74#define GPTE_COHERENT 2
75#define GPTE_ENCODE(x) \
76 (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
77#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
78
79#define to_pages(addr,size) \
80 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
81
82#define for_all_nb(dev) \
83 dev = NULL; \
84 while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)\
85 if (dev->bus->number == 0 && \
86 (PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31))
87
88static struct pci_dev *northbridges[MAX_NB];
89static u32 northbridge_flush_word[MAX_NB];
90
91#define EMERGENCY_PAGES 32 /* = 128KB */
92
93#ifdef CONFIG_AGP
94#define AGPEXTERN extern
95#else
96#define AGPEXTERN
97#endif
98
99/* backdoor interface to AGP driver */
100AGPEXTERN int agp_memory_reserved;
101AGPEXTERN __u32 *agp_gatt_table;
102
103static unsigned long next_bit; /* protected by iommu_bitmap_lock */
104static int need_flush; /* global flush state. set for each gart wrap */
105static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem,
106 size_t size, int dir, int do_panic);
107
108/* Dummy device used for NULL arguments (normally ISA). Better would
109 be probably a smaller DMA mask, but this is bug-to-bug compatible to i386. */
110static struct device fallback_dev = {
111 .bus_id = "fallback device",
112 .coherent_dma_mask = 0xffffffff,
113 .dma_mask = &fallback_dev.coherent_dma_mask,
114};
115
116static unsigned long alloc_iommu(int size)
117{
118 unsigned long offset, flags;
119
120 spin_lock_irqsave(&iommu_bitmap_lock, flags);
121 offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
122 if (offset == -1) {
123 need_flush = 1;
124 offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size);
125 }
126 if (offset != -1) {
127 set_bit_string(iommu_gart_bitmap, offset, size);
128 next_bit = offset+size;
129 if (next_bit >= iommu_pages) {
130 next_bit = 0;
131 need_flush = 1;
132 }
133 }
134 if (iommu_fullflush)
135 need_flush = 1;
136 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
137 return offset;
138}
139
140static void free_iommu(unsigned long offset, int size)
141{
142 unsigned long flags;
143 if (size == 1) {
144 clear_bit(offset, iommu_gart_bitmap);
145 return;
146 }
147 spin_lock_irqsave(&iommu_bitmap_lock, flags);
148 __clear_bit_string(iommu_gart_bitmap, offset, size);
149 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
150}
151
152/*
153 * Use global flush state to avoid races with multiple flushers.
154 */
155static void flush_gart(struct device *dev)
156{
157 unsigned long flags;
158 int flushed = 0;
159 int i, max;
160
161 spin_lock_irqsave(&iommu_bitmap_lock, flags);
162 if (need_flush) {
163 max = 0;
164 for (i = 0; i < MAX_NB; i++) {
165 if (!northbridges[i])
166 continue;
167 pci_write_config_dword(northbridges[i], 0x9c,
168 northbridge_flush_word[i] | 1);
169 flushed++;
170 max = i;
171 }
172 for (i = 0; i <= max; i++) {
173 u32 w;
174 if (!northbridges[i])
175 continue;
176 /* Make sure the hardware actually executed the flush. */
177 do {
178 pci_read_config_dword(northbridges[i], 0x9c, &w);
179 } while (w & 1);
180 }
181 if (!flushed)
182 printk("nothing to flush?\n");
183 need_flush = 0;
184 }
185 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
186}
187
188/* Allocate DMA memory on node near device */
189noinline
190static void *dma_alloc_pages(struct device *dev, unsigned gfp, unsigned order)
191{
192 struct page *page;
193 int node;
194 if (dev->bus == &pci_bus_type) {
195 cpumask_t mask;
196 mask = pcibus_to_cpumask(to_pci_dev(dev)->bus);
197 node = cpu_to_node(first_cpu(mask));
198 } else
199 node = numa_node_id();
200 page = alloc_pages_node(node, gfp, order);
201 return page ? page_address(page) : NULL;
202}
203
204/*
205 * Allocate memory for a coherent mapping.
206 */
207void *
208dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
209 unsigned gfp)
210{
211 void *memory;
212 unsigned long dma_mask = 0;
213 u64 bus;
214
215 if (!dev)
216 dev = &fallback_dev;
217 dma_mask = dev->coherent_dma_mask;
218 if (dma_mask == 0)
219 dma_mask = 0xffffffff;
220
221 /* Kludge to make it bug-to-bug compatible with i386. i386
222 uses the normal dma_mask for alloc_coherent. */
223 dma_mask &= *dev->dma_mask;
224
225 again:
226 memory = dma_alloc_pages(dev, gfp, get_order(size));
227 if (memory == NULL)
228 return NULL;
229
230 {
231 int high, mmu;
232 bus = virt_to_bus(memory);
233 high = (bus + size) >= dma_mask;
234 mmu = high;
235 if (force_iommu && !(gfp & GFP_DMA))
236 mmu = 1;
237 if (no_iommu || dma_mask < 0xffffffffUL) {
238 if (high) {
239 free_pages((unsigned long)memory,
240 get_order(size));
241
242 if (swiotlb) {
243 return
244 swiotlb_alloc_coherent(dev, size,
245 dma_handle,
246 gfp);
247 }
248
249 if (!(gfp & GFP_DMA)) {
250 gfp |= GFP_DMA;
251 goto again;
252 }
253 return NULL;
254 }
255 mmu = 0;
256 }
257 memset(memory, 0, size);
258 if (!mmu) {
259 *dma_handle = virt_to_bus(memory);
260 return memory;
261 }
262 }
263
264 *dma_handle = dma_map_area(dev, bus, size, PCI_DMA_BIDIRECTIONAL, 0);
265 if (*dma_handle == bad_dma_address)
266 goto error;
267 flush_gart(dev);
268 return memory;
269
270error:
271 if (panic_on_overflow)
272 panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", size);
273 free_pages((unsigned long)memory, get_order(size));
274 return NULL;
275}
276
277/*
278 * Unmap coherent memory.
279 * The caller must ensure that the device has finished accessing the mapping.
280 */
281void dma_free_coherent(struct device *dev, size_t size,
282 void *vaddr, dma_addr_t bus)
283{
284 if (swiotlb) {
285 swiotlb_free_coherent(dev, size, vaddr, bus);
286 return;
287 }
288
289 dma_unmap_single(dev, bus, size, 0);
290 free_pages((unsigned long)vaddr, get_order(size));
291}
292
293#ifdef CONFIG_IOMMU_LEAK
294
295#define SET_LEAK(x) if (iommu_leak_tab) \
296 iommu_leak_tab[x] = __builtin_return_address(0);
297#define CLEAR_LEAK(x) if (iommu_leak_tab) \
298 iommu_leak_tab[x] = NULL;
299
300/* Debugging aid for drivers that don't free their IOMMU tables */
301static void **iommu_leak_tab;
302static int leak_trace;
303int iommu_leak_pages = 20;
304void dump_leak(void)
305{
306 int i;
307 static int dump;
308 if (dump || !iommu_leak_tab) return;
309 dump = 1;
310 show_stack(NULL,NULL);
311 /* Very crude. dump some from the end of the table too */
312 printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages);
313 for (i = 0; i < iommu_leak_pages; i+=2) {
314 printk("%lu: ", iommu_pages-i);
315 printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]);
316 printk("%c", (i+1)%2 == 0 ? '\n' : ' ');
317 }
318 printk("\n");
319}
320#else
321#define SET_LEAK(x)
322#define CLEAR_LEAK(x)
323#endif
324
325static void iommu_full(struct device *dev, size_t size, int dir, int do_panic)
326{
327 /*
328 * Ran out of IOMMU space for this operation. This is very bad.
329 * Unfortunately the drivers cannot handle this operation properly.
330 * Return some non mapped prereserved space in the aperture and
331 * let the Northbridge deal with it. This will result in garbage
332 * in the IO operation. When the size exceeds the prereserved space
333 * memory corruption will occur or random memory will be DMAed
334 * out. Hopefully no network devices use single mappings that big.
335 */
336
337 printk(KERN_ERR
338 "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n",
339 size, dev->bus_id);
340
341 if (size > PAGE_SIZE*EMERGENCY_PAGES && do_panic) {
342 if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
343 panic("PCI-DMA: Memory would be corrupted\n");
344 if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
345 panic("PCI-DMA: Random memory would be DMAed\n");
346 }
347
348#ifdef CONFIG_IOMMU_LEAK
349 dump_leak();
350#endif
351}
352
353static inline int need_iommu(struct device *dev, unsigned long addr, size_t size)
354{
355 u64 mask = *dev->dma_mask;
356 int high = addr + size >= mask;
357 int mmu = high;
358 if (force_iommu)
359 mmu = 1;
360 if (no_iommu) {
361 if (high)
362 panic("PCI-DMA: high address but no IOMMU.\n");
363 mmu = 0;
364 }
365 return mmu;
366}
367
368static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
369{
370 u64 mask = *dev->dma_mask;
371 int high = addr + size >= mask;
372 int mmu = high;
373 if (no_iommu) {
374 if (high)
375 panic("PCI-DMA: high address but no IOMMU.\n");
376 mmu = 0;
377 }
378 return mmu;
379}
380
381/* Map a single continuous physical area into the IOMMU.
382 * Caller needs to check if the iommu is needed and flush.
383 */
384static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem,
385 size_t size, int dir, int do_panic)
386{
387 unsigned long npages = to_pages(phys_mem, size);
388 unsigned long iommu_page = alloc_iommu(npages);
389 int i;
390 if (iommu_page == -1) {
391 if (!nonforced_iommu(dev, phys_mem, size))
392 return phys_mem;
393 if (panic_on_overflow)
394 panic("dma_map_area overflow %lu bytes\n", size);
395 iommu_full(dev, size, dir, do_panic);
396 return bad_dma_address;
397 }
398
399 for (i = 0; i < npages; i++) {
400 iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
401 SET_LEAK(iommu_page + i);
402 phys_mem += PAGE_SIZE;
403 }
404 return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
405}
406
407/* Map a single area into the IOMMU */
408dma_addr_t dma_map_single(struct device *dev, void *addr, size_t size, int dir)
409{
410 unsigned long phys_mem, bus;
411
412 BUG_ON(dir == DMA_NONE);
413
414 if (swiotlb)
415 return swiotlb_map_single(dev,addr,size,dir);
416 if (!dev)
417 dev = &fallback_dev;
418
419 phys_mem = virt_to_phys(addr);
420 if (!need_iommu(dev, phys_mem, size))
421 return phys_mem;
422
423 bus = dma_map_area(dev, phys_mem, size, dir, 1);
424 flush_gart(dev);
425 return bus;
426}
427
428/* Fallback for dma_map_sg in case of overflow */
429static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
430 int nents, int dir)
431{
432 int i;
433
434#ifdef CONFIG_IOMMU_DEBUG
435 printk(KERN_DEBUG "dma_map_sg overflow\n");
436#endif
437
438 for (i = 0; i < nents; i++ ) {
439 struct scatterlist *s = &sg[i];
440 unsigned long addr = page_to_phys(s->page) + s->offset;
441 if (nonforced_iommu(dev, addr, s->length)) {
442 addr = dma_map_area(dev, addr, s->length, dir, 0);
443 if (addr == bad_dma_address) {
444 if (i > 0)
445 dma_unmap_sg(dev, sg, i, dir);
446 nents = 0;
447 sg[0].dma_length = 0;
448 break;
449 }
450 }
451 s->dma_address = addr;
452 s->dma_length = s->length;
453 }
454 flush_gart(dev);
455 return nents;
456}
457
458/* Map multiple scatterlist entries continuous into the first. */
459static int __dma_map_cont(struct scatterlist *sg, int start, int stopat,
460 struct scatterlist *sout, unsigned long pages)
461{
462 unsigned long iommu_start = alloc_iommu(pages);
463 unsigned long iommu_page = iommu_start;
464 int i;
465
466 if (iommu_start == -1)
467 return -1;
468
469 for (i = start; i < stopat; i++) {
470 struct scatterlist *s = &sg[i];
471 unsigned long pages, addr;
472 unsigned long phys_addr = s->dma_address;
473
474 BUG_ON(i > start && s->offset);
475 if (i == start) {
476 *sout = *s;
477 sout->dma_address = iommu_bus_base;
478 sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
479 sout->dma_length = s->length;
480 } else {
481 sout->dma_length += s->length;
482 }
483
484 addr = phys_addr;
485 pages = to_pages(s->offset, s->length);
486 while (pages--) {
487 iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
488 SET_LEAK(iommu_page);
489 addr += PAGE_SIZE;
490 iommu_page++;
491 }
492 }
493 BUG_ON(iommu_page - iommu_start != pages);
494 return 0;
495}
496
497static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat,
498 struct scatterlist *sout,
499 unsigned long pages, int need)
500{
501 if (!need) {
502 BUG_ON(stopat - start != 1);
503 *sout = sg[start];
504 sout->dma_length = sg[start].length;
505 return 0;
506 }
507 return __dma_map_cont(sg, start, stopat, sout, pages);
508}
509
510/*
511 * DMA map all entries in a scatterlist.
512 * Merge chunks that have page aligned sizes into a continuous mapping.
513 */
514int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
515{
516 int i;
517 int out;
518 int start;
519 unsigned long pages = 0;
520 int need = 0, nextneed;
521
522 BUG_ON(dir == DMA_NONE);
523 if (nents == 0)
524 return 0;
525
526 if (swiotlb)
527 return swiotlb_map_sg(dev,sg,nents,dir);
528 if (!dev)
529 dev = &fallback_dev;
530
531 out = 0;
532 start = 0;
533 for (i = 0; i < nents; i++) {
534 struct scatterlist *s = &sg[i];
535 dma_addr_t addr = page_to_phys(s->page) + s->offset;
536 s->dma_address = addr;
537 BUG_ON(s->length == 0);
538
539 nextneed = need_iommu(dev, addr, s->length);
540
541 /* Handle the previous not yet processed entries */
542 if (i > start) {
543 struct scatterlist *ps = &sg[i-1];
544 /* Can only merge when the last chunk ends on a page
545 boundary and the new one doesn't have an offset. */
546 if (!iommu_merge || !nextneed || !need || s->offset ||
547 (ps->offset + ps->length) % PAGE_SIZE) {
548 if (dma_map_cont(sg, start, i, sg+out, pages,
549 need) < 0)
550 goto error;
551 out++;
552 pages = 0;
553 start = i;
554 }
555 }
556
557 need = nextneed;
558 pages += to_pages(s->offset, s->length);
559 }
560 if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
561 goto error;
562 out++;
563 flush_gart(dev);
564 if (out < nents)
565 sg[out].dma_length = 0;
566 return out;
567
568error:
569 flush_gart(NULL);
570 dma_unmap_sg(dev, sg, nents, dir);
571 /* When it was forced try again unforced */
572 if (force_iommu)
573 return dma_map_sg_nonforce(dev, sg, nents, dir);
574 if (panic_on_overflow)
575 panic("dma_map_sg: overflow on %lu pages\n", pages);
576 iommu_full(dev, pages << PAGE_SHIFT, dir, 0);
577 for (i = 0; i < nents; i++)
578 sg[i].dma_address = bad_dma_address;
579 return 0;
580}
581
582/*
583 * Free a DMA mapping.
584 */
585void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
586 size_t size, int direction)
587{
588 unsigned long iommu_page;
589 int npages;
590 int i;
591
592 if (swiotlb) {
593 swiotlb_unmap_single(dev,dma_addr,size,direction);
594 return;
595 }
596
597 if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
598 dma_addr >= iommu_bus_base + iommu_size)
599 return;
600 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
601 npages = to_pages(dma_addr, size);
602 for (i = 0; i < npages; i++) {
603 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
604 CLEAR_LEAK(iommu_page + i);
605 }
606 free_iommu(iommu_page, npages);
607}
608
609/*
610 * Wrapper for pci_unmap_single working with scatterlists.
611 */
612void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
613{
614 int i;
615 if (swiotlb) {
616 swiotlb_unmap_sg(dev,sg,nents,dir);
617 return;
618 }
619 for (i = 0; i < nents; i++) {
620 struct scatterlist *s = &sg[i];
621 if (!s->dma_length || !s->length)
622 break;
623 dma_unmap_single(dev, s->dma_address, s->dma_length, dir);
624 }
625}
626
627int dma_supported(struct device *dev, u64 mask)
628{
629 /* Copied from i386. Doesn't make much sense, because it will
630 only work for pci_alloc_coherent.
631 The caller just has to use GFP_DMA in this case. */
632 if (mask < 0x00ffffff)
633 return 0;
634
635 /* Tell the device to use SAC when IOMMU force is on.
636 This allows the driver to use cheaper accesses in some cases.
637
638 Problem with this is that if we overflow the IOMMU area
639 and return DAC as fallback address the device may not handle it correctly.
640
641 As a special case some controllers have a 39bit address mode
642 that is as efficient as 32bit (aic79xx). Don't force SAC for these.
643 Assume all masks <= 40 bits are of this type. Normally this doesn't
644 make any difference, but gives more gentle handling of IOMMU overflow. */
645 if (iommu_sac_force && (mask >= 0xffffffffffULL)) {
646 printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
647 return 0;
648 }
649
650 return 1;
651}
652
653int dma_get_cache_alignment(void)
654{
655 return boot_cpu_data.x86_clflush_size;
656}
657
658EXPORT_SYMBOL(dma_unmap_sg);
659EXPORT_SYMBOL(dma_map_sg);
660EXPORT_SYMBOL(dma_map_single);
661EXPORT_SYMBOL(dma_unmap_single);
662EXPORT_SYMBOL(dma_supported);
663EXPORT_SYMBOL(no_iommu);
664EXPORT_SYMBOL(force_iommu);
665EXPORT_SYMBOL(bad_dma_address);
666EXPORT_SYMBOL(iommu_bio_merge);
667EXPORT_SYMBOL(iommu_sac_force);
668EXPORT_SYMBOL(dma_get_cache_alignment);
669EXPORT_SYMBOL(dma_alloc_coherent);
670EXPORT_SYMBOL(dma_free_coherent);
671
672static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
673{
674 unsigned long a;
675 if (!iommu_size) {
676 iommu_size = aper_size;
677 if (!no_agp)
678 iommu_size /= 2;
679 }
680
681 a = aper + iommu_size;
682 iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
683
684 if (iommu_size < 64*1024*1024)
685 printk(KERN_WARNING
686 "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20);
687
688 return iommu_size;
689}
690
691static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
692{
693 unsigned aper_size = 0, aper_base_32;
694 u64 aper_base;
695 unsigned aper_order;
696
697 pci_read_config_dword(dev, 0x94, &aper_base_32);
698 pci_read_config_dword(dev, 0x90, &aper_order);
699 aper_order = (aper_order >> 1) & 7;
700
701 aper_base = aper_base_32 & 0x7fff;
702 aper_base <<= 25;
703
704 aper_size = (32 * 1024 * 1024) << aper_order;
705 if (aper_base + aper_size >= 0xffffffff || !aper_size)
706 aper_base = 0;
707
708 *size = aper_size;
709 return aper_base;
710}
711
712/*
713 * Private Northbridge GATT initialization in case we cannot use the
714 * AGP driver for some reason.
715 */
716static __init int init_k8_gatt(struct agp_kern_info *info)
717{
718 struct pci_dev *dev;
719 void *gatt;
720 unsigned aper_base, new_aper_base;
721 unsigned aper_size, gatt_size, new_aper_size;
722
723 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
724 aper_size = aper_base = info->aper_size = 0;
725 for_all_nb(dev) {
726 new_aper_base = read_aperture(dev, &new_aper_size);
727 if (!new_aper_base)
728 goto nommu;
729
730 if (!aper_base) {
731 aper_size = new_aper_size;
732 aper_base = new_aper_base;
733 }
734 if (aper_size != new_aper_size || aper_base != new_aper_base)
735 goto nommu;
736 }
737 if (!aper_base)
738 goto nommu;
739 info->aper_base = aper_base;
740 info->aper_size = aper_size>>20;
741
742 gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
743 gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
744 if (!gatt)
745 panic("Cannot allocate GATT table");
746 memset(gatt, 0, gatt_size);
747 agp_gatt_table = gatt;
748
749 for_all_nb(dev) {
750 u32 ctl;
751 u32 gatt_reg;
752
753 gatt_reg = __pa(gatt) >> 12;
754 gatt_reg <<= 4;
755 pci_write_config_dword(dev, 0x98, gatt_reg);
756 pci_read_config_dword(dev, 0x90, &ctl);
757
758 ctl |= 1;
759 ctl &= ~((1<<4) | (1<<5));
760
761 pci_write_config_dword(dev, 0x90, ctl);
762 }
763 flush_gart(NULL);
764
765 printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10);
766 return 0;
767
768 nommu:
769 /* Should not happen anymore */
770 printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
771 KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.");
772 return -1;
773}
774
775extern int agp_amd64_init(void);
776
777static int __init pci_iommu_init(void)
778{
779 struct agp_kern_info info;
780 unsigned long aper_size;
781 unsigned long iommu_start;
782 struct pci_dev *dev;
783 unsigned long scratch;
784 long i;
785
786#ifndef CONFIG_AGP_AMD64
787 no_agp = 1;
788#else
789 /* Makefile puts PCI initialization via subsys_initcall first. */
790 /* Add other K8 AGP bridge drivers here */
791 no_agp = no_agp ||
792 (agp_amd64_init() < 0) ||
793 (agp_copy_info(agp_bridge, &info) < 0);
794#endif
795
796 if (swiotlb) {
797 no_iommu = 1;
798 printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
799 return -1;
800 }
801
802 if (no_iommu ||
803 (!force_iommu && end_pfn < 0xffffffff>>PAGE_SHIFT) ||
804 !iommu_aperture ||
805 (no_agp && init_k8_gatt(&info) < 0)) {
806 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
807 no_iommu = 1;
808 return -1;
809 }
810
811 aper_size = info.aper_size * 1024 * 1024;
812 iommu_size = check_iommu_size(info.aper_base, aper_size);
813 iommu_pages = iommu_size >> PAGE_SHIFT;
814
815 iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL,
816 get_order(iommu_pages/8));
817 if (!iommu_gart_bitmap)
818 panic("Cannot allocate iommu bitmap\n");
819 memset(iommu_gart_bitmap, 0, iommu_pages/8);
820
821#ifdef CONFIG_IOMMU_LEAK
822 if (leak_trace) {
823 iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
824 get_order(iommu_pages*sizeof(void *)));
825 if (iommu_leak_tab)
826 memset(iommu_leak_tab, 0, iommu_pages * 8);
827 else
828 printk("PCI-DMA: Cannot allocate leak trace area\n");
829 }
830#endif
831
832 /*
833 * Out of IOMMU space handling.
834 * Reserve some invalid pages at the beginning of the GART.
835 */
836 set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
837
838 agp_memory_reserved = iommu_size;
839 printk(KERN_INFO
840 "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
841 iommu_size>>20);
842
843 iommu_start = aper_size - iommu_size;
844 iommu_bus_base = info.aper_base + iommu_start;
845 bad_dma_address = iommu_bus_base;
846 iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
847
848 /*
849 * Unmap the IOMMU part of the GART. The alias of the page is
850 * always mapped with cache enabled and there is no full cache
851 * coherency across the GART remapping. The unmapping avoids
852 * automatic prefetches from the CPU allocating cache lines in
853 * there. All CPU accesses are done via the direct mapping to
854 * the backing memory. The GART address is only used by PCI
855 * devices.
856 */
857 clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
858
859 /*
860 * Try to workaround a bug (thanks to BenH)
861 * Set unmapped entries to a scratch page instead of 0.
862 * Any prefetches that hit unmapped entries won't get an bus abort
863 * then.
864 */
865 scratch = get_zeroed_page(GFP_KERNEL);
866 if (!scratch)
867 panic("Cannot allocate iommu scratch page");
868 gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
869 for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
870 iommu_gatt_base[i] = gart_unmapped_entry;
871
872 for_all_nb(dev) {
873 u32 flag;
874 int cpu = PCI_SLOT(dev->devfn) - 24;
875 if (cpu >= MAX_NB)
876 continue;
877 northbridges[cpu] = dev;
878 pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
879 northbridge_flush_word[cpu] = flag;
880 }
881
882 flush_gart(NULL);
883
884 return 0;
885}
886
887/* Must execute after PCI subsystem */
888fs_initcall(pci_iommu_init);
889
890/* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge]
891 [,forcesac][,fullflush][,nomerge][,biomerge]
892 size set size of iommu (in bytes)
893 noagp don't initialize the AGP driver and use full aperture.
894 off don't use the IOMMU
895 leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on)
896 memaper[=order] allocate an own aperture over RAM with size 32MB^order.
897 noforce don't force IOMMU usage. Default.
898 force Force IOMMU.
899 merge Do lazy merging. This may improve performance on some block devices.
900 Implies force (experimental)
901 biomerge Do merging at the BIO layer. This is more efficient than merge,
902 but should be only done with very big IOMMUs. Implies merge,force.
903 nomerge Don't do SG merging.
904 forcesac For SAC mode for masks <40bits (experimental)
905 fullflush Flush IOMMU on each allocation (default)
906 nofullflush Don't use IOMMU fullflush
907 allowed overwrite iommu off workarounds for specific chipsets.
908 soft Use software bounce buffering (default for Intel machines)
909 noaperture Don't touch the aperture for AGP.
910*/
911__init int iommu_setup(char *p)
912{
913 int arg;
914
915 while (*p) {
916 if (!strncmp(p,"noagp",5))
917 no_agp = 1;
918 if (!strncmp(p,"off",3))
919 no_iommu = 1;
920 if (!strncmp(p,"force",5)) {
921 force_iommu = 1;
922 iommu_aperture_allowed = 1;
923 }
924 if (!strncmp(p,"allowed",7))
925 iommu_aperture_allowed = 1;
926 if (!strncmp(p,"noforce",7)) {
927 iommu_merge = 0;
928 force_iommu = 0;
929 }
930 if (!strncmp(p, "memaper", 7)) {
931 fallback_aper_force = 1;
932 p += 7;
933 if (*p == '=') {
934 ++p;
935 if (get_option(&p, &arg))
936 fallback_aper_order = arg;
937 }
938 }
939 if (!strncmp(p, "biomerge",8)) {
940 iommu_bio_merge = 4096;
941 iommu_merge = 1;
942 force_iommu = 1;
943 }
944 if (!strncmp(p, "panic",5))
945 panic_on_overflow = 1;
946 if (!strncmp(p, "nopanic",7))
947 panic_on_overflow = 0;
948 if (!strncmp(p, "merge",5)) {
949 iommu_merge = 1;
950 force_iommu = 1;
951 }
952 if (!strncmp(p, "nomerge",7))
953 iommu_merge = 0;
954 if (!strncmp(p, "forcesac",8))
955 iommu_sac_force = 1;
956 if (!strncmp(p, "fullflush",8))
957 iommu_fullflush = 1;
958 if (!strncmp(p, "nofullflush",11))
959 iommu_fullflush = 0;
960 if (!strncmp(p, "soft",4))
961 swiotlb = 1;
962 if (!strncmp(p, "noaperture",10))
963 fix_aperture = 0;
964#ifdef CONFIG_IOMMU_LEAK
965 if (!strncmp(p,"leak",4)) {
966 leak_trace = 1;
967 p += 4;
968 if (*p == '=') ++p;
969 if (isdigit(*p) && get_option(&p, &arg))
970 iommu_leak_pages = arg;
971 } else
972#endif
973 if (isdigit(*p) && get_option(&p, &arg))
974 iommu_size = arg;
975 p += strcspn(p, ",");
976 if (*p == ',')
977 ++p;
978 }
979 return 1;
980}