diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:17:24 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:17:24 -0400 |
commit | 250c22777fe1ccd7ac588579a6c16db4c0161cc5 (patch) | |
tree | 55c317efb7d792ec6fdae1d1937c67a502c48dec /arch/x86/kernel/pci-gart_64.c | |
parent | 2db55d344e529492545cb3b755c7e9ba8e4fa94e (diff) |
x86_64: move kernel
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/pci-gart_64.c')
-rw-r--r-- | arch/x86/kernel/pci-gart_64.c | 740 |
1 files changed, 740 insertions, 0 deletions
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c new file mode 100644 index 000000000000..4918c575d582 --- /dev/null +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -0,0 +1,740 @@ | |||
1 | /* | ||
2 | * Dynamic DMA mapping support for AMD Hammer. | ||
3 | * | ||
4 | * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI. | ||
5 | * This allows to use PCI devices that only support 32bit addresses on systems | ||
6 | * with more than 4GB. | ||
7 | * | ||
8 | * See Documentation/DMA-mapping.txt for the interface specification. | ||
9 | * | ||
10 | * Copyright 2002 Andi Kleen, SuSE Labs. | ||
11 | */ | ||
12 | |||
13 | #include <linux/types.h> | ||
14 | #include <linux/ctype.h> | ||
15 | #include <linux/agp_backend.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/mm.h> | ||
18 | #include <linux/string.h> | ||
19 | #include <linux/spinlock.h> | ||
20 | #include <linux/pci.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/topology.h> | ||
23 | #include <linux/interrupt.h> | ||
24 | #include <linux/bitops.h> | ||
25 | #include <linux/kdebug.h> | ||
26 | #include <asm/atomic.h> | ||
27 | #include <asm/io.h> | ||
28 | #include <asm/mtrr.h> | ||
29 | #include <asm/pgtable.h> | ||
30 | #include <asm/proto.h> | ||
31 | #include <asm/iommu.h> | ||
32 | #include <asm/cacheflush.h> | ||
33 | #include <asm/swiotlb.h> | ||
34 | #include <asm/dma.h> | ||
35 | #include <asm/k8.h> | ||
36 | |||
37 | unsigned long iommu_bus_base; /* GART remapping area (physical) */ | ||
38 | static unsigned long iommu_size; /* size of remapping area bytes */ | ||
39 | static unsigned long iommu_pages; /* .. and in pages */ | ||
40 | |||
41 | u32 *iommu_gatt_base; /* Remapping table */ | ||
42 | |||
43 | /* If this is disabled the IOMMU will use an optimized flushing strategy | ||
44 | of only flushing when an mapping is reused. With it true the GART is flushed | ||
45 | for every mapping. Problem is that doing the lazy flush seems to trigger | ||
46 | bugs with some popular PCI cards, in particular 3ware (but has been also | ||
47 | also seen with Qlogic at least). */ | ||
48 | int iommu_fullflush = 1; | ||
49 | |||
50 | /* Allocation bitmap for the remapping area */ | ||
51 | static DEFINE_SPINLOCK(iommu_bitmap_lock); | ||
52 | static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ | ||
53 | |||
54 | static u32 gart_unmapped_entry; | ||
55 | |||
56 | #define GPTE_VALID 1 | ||
57 | #define GPTE_COHERENT 2 | ||
58 | #define GPTE_ENCODE(x) \ | ||
59 | (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) | ||
60 | #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) | ||
61 | |||
62 | #define to_pages(addr,size) \ | ||
63 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) | ||
64 | |||
65 | #define EMERGENCY_PAGES 32 /* = 128KB */ | ||
66 | |||
67 | #ifdef CONFIG_AGP | ||
68 | #define AGPEXTERN extern | ||
69 | #else | ||
70 | #define AGPEXTERN | ||
71 | #endif | ||
72 | |||
73 | /* backdoor interface to AGP driver */ | ||
74 | AGPEXTERN int agp_memory_reserved; | ||
75 | AGPEXTERN __u32 *agp_gatt_table; | ||
76 | |||
77 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ | ||
78 | static int need_flush; /* global flush state. set for each gart wrap */ | ||
79 | |||
80 | static unsigned long alloc_iommu(int size) | ||
81 | { | ||
82 | unsigned long offset, flags; | ||
83 | |||
84 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | ||
85 | offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size); | ||
86 | if (offset == -1) { | ||
87 | need_flush = 1; | ||
88 | offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size); | ||
89 | } | ||
90 | if (offset != -1) { | ||
91 | set_bit_string(iommu_gart_bitmap, offset, size); | ||
92 | next_bit = offset+size; | ||
93 | if (next_bit >= iommu_pages) { | ||
94 | next_bit = 0; | ||
95 | need_flush = 1; | ||
96 | } | ||
97 | } | ||
98 | if (iommu_fullflush) | ||
99 | need_flush = 1; | ||
100 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | ||
101 | return offset; | ||
102 | } | ||
103 | |||
104 | static void free_iommu(unsigned long offset, int size) | ||
105 | { | ||
106 | unsigned long flags; | ||
107 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | ||
108 | __clear_bit_string(iommu_gart_bitmap, offset, size); | ||
109 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * Use global flush state to avoid races with multiple flushers. | ||
114 | */ | ||
115 | static void flush_gart(void) | ||
116 | { | ||
117 | unsigned long flags; | ||
118 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | ||
119 | if (need_flush) { | ||
120 | k8_flush_garts(); | ||
121 | need_flush = 0; | ||
122 | } | ||
123 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | ||
124 | } | ||
125 | |||
126 | #ifdef CONFIG_IOMMU_LEAK | ||
127 | |||
128 | #define SET_LEAK(x) if (iommu_leak_tab) \ | ||
129 | iommu_leak_tab[x] = __builtin_return_address(0); | ||
130 | #define CLEAR_LEAK(x) if (iommu_leak_tab) \ | ||
131 | iommu_leak_tab[x] = NULL; | ||
132 | |||
133 | /* Debugging aid for drivers that don't free their IOMMU tables */ | ||
134 | static void **iommu_leak_tab; | ||
135 | static int leak_trace; | ||
136 | int iommu_leak_pages = 20; | ||
137 | void dump_leak(void) | ||
138 | { | ||
139 | int i; | ||
140 | static int dump; | ||
141 | if (dump || !iommu_leak_tab) return; | ||
142 | dump = 1; | ||
143 | show_stack(NULL,NULL); | ||
144 | /* Very crude. dump some from the end of the table too */ | ||
145 | printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages); | ||
146 | for (i = 0; i < iommu_leak_pages; i+=2) { | ||
147 | printk("%lu: ", iommu_pages-i); | ||
148 | printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]); | ||
149 | printk("%c", (i+1)%2 == 0 ? '\n' : ' '); | ||
150 | } | ||
151 | printk("\n"); | ||
152 | } | ||
153 | #else | ||
154 | #define SET_LEAK(x) | ||
155 | #define CLEAR_LEAK(x) | ||
156 | #endif | ||
157 | |||
158 | static void iommu_full(struct device *dev, size_t size, int dir) | ||
159 | { | ||
160 | /* | ||
161 | * Ran out of IOMMU space for this operation. This is very bad. | ||
162 | * Unfortunately the drivers cannot handle this operation properly. | ||
163 | * Return some non mapped prereserved space in the aperture and | ||
164 | * let the Northbridge deal with it. This will result in garbage | ||
165 | * in the IO operation. When the size exceeds the prereserved space | ||
166 | * memory corruption will occur or random memory will be DMAed | ||
167 | * out. Hopefully no network devices use single mappings that big. | ||
168 | */ | ||
169 | |||
170 | printk(KERN_ERR | ||
171 | "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", | ||
172 | size, dev->bus_id); | ||
173 | |||
174 | if (size > PAGE_SIZE*EMERGENCY_PAGES) { | ||
175 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) | ||
176 | panic("PCI-DMA: Memory would be corrupted\n"); | ||
177 | if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) | ||
178 | panic(KERN_ERR "PCI-DMA: Random memory would be DMAed\n"); | ||
179 | } | ||
180 | |||
181 | #ifdef CONFIG_IOMMU_LEAK | ||
182 | dump_leak(); | ||
183 | #endif | ||
184 | } | ||
185 | |||
186 | static inline int need_iommu(struct device *dev, unsigned long addr, size_t size) | ||
187 | { | ||
188 | u64 mask = *dev->dma_mask; | ||
189 | int high = addr + size > mask; | ||
190 | int mmu = high; | ||
191 | if (force_iommu) | ||
192 | mmu = 1; | ||
193 | return mmu; | ||
194 | } | ||
195 | |||
196 | static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size) | ||
197 | { | ||
198 | u64 mask = *dev->dma_mask; | ||
199 | int high = addr + size > mask; | ||
200 | int mmu = high; | ||
201 | return mmu; | ||
202 | } | ||
203 | |||
204 | /* Map a single continuous physical area into the IOMMU. | ||
205 | * Caller needs to check if the iommu is needed and flush. | ||
206 | */ | ||
207 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | ||
208 | size_t size, int dir) | ||
209 | { | ||
210 | unsigned long npages = to_pages(phys_mem, size); | ||
211 | unsigned long iommu_page = alloc_iommu(npages); | ||
212 | int i; | ||
213 | if (iommu_page == -1) { | ||
214 | if (!nonforced_iommu(dev, phys_mem, size)) | ||
215 | return phys_mem; | ||
216 | if (panic_on_overflow) | ||
217 | panic("dma_map_area overflow %lu bytes\n", size); | ||
218 | iommu_full(dev, size, dir); | ||
219 | return bad_dma_address; | ||
220 | } | ||
221 | |||
222 | for (i = 0; i < npages; i++) { | ||
223 | iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); | ||
224 | SET_LEAK(iommu_page + i); | ||
225 | phys_mem += PAGE_SIZE; | ||
226 | } | ||
227 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); | ||
228 | } | ||
229 | |||
230 | static dma_addr_t gart_map_simple(struct device *dev, char *buf, | ||
231 | size_t size, int dir) | ||
232 | { | ||
233 | dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); | ||
234 | flush_gart(); | ||
235 | return map; | ||
236 | } | ||
237 | |||
238 | /* Map a single area into the IOMMU */ | ||
239 | static dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir) | ||
240 | { | ||
241 | unsigned long phys_mem, bus; | ||
242 | |||
243 | if (!dev) | ||
244 | dev = &fallback_dev; | ||
245 | |||
246 | phys_mem = virt_to_phys(addr); | ||
247 | if (!need_iommu(dev, phys_mem, size)) | ||
248 | return phys_mem; | ||
249 | |||
250 | bus = gart_map_simple(dev, addr, size, dir); | ||
251 | return bus; | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Free a DMA mapping. | ||
256 | */ | ||
257 | static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, | ||
258 | size_t size, int direction) | ||
259 | { | ||
260 | unsigned long iommu_page; | ||
261 | int npages; | ||
262 | int i; | ||
263 | |||
264 | if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || | ||
265 | dma_addr >= iommu_bus_base + iommu_size) | ||
266 | return; | ||
267 | iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; | ||
268 | npages = to_pages(dma_addr, size); | ||
269 | for (i = 0; i < npages; i++) { | ||
270 | iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; | ||
271 | CLEAR_LEAK(iommu_page + i); | ||
272 | } | ||
273 | free_iommu(iommu_page, npages); | ||
274 | } | ||
275 | |||
276 | /* | ||
277 | * Wrapper for pci_unmap_single working with scatterlists. | ||
278 | */ | ||
279 | static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | ||
280 | { | ||
281 | int i; | ||
282 | |||
283 | for (i = 0; i < nents; i++) { | ||
284 | struct scatterlist *s = &sg[i]; | ||
285 | if (!s->dma_length || !s->length) | ||
286 | break; | ||
287 | gart_unmap_single(dev, s->dma_address, s->dma_length, dir); | ||
288 | } | ||
289 | } | ||
290 | |||
291 | /* Fallback for dma_map_sg in case of overflow */ | ||
292 | static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | ||
293 | int nents, int dir) | ||
294 | { | ||
295 | int i; | ||
296 | |||
297 | #ifdef CONFIG_IOMMU_DEBUG | ||
298 | printk(KERN_DEBUG "dma_map_sg overflow\n"); | ||
299 | #endif | ||
300 | |||
301 | for (i = 0; i < nents; i++ ) { | ||
302 | struct scatterlist *s = &sg[i]; | ||
303 | unsigned long addr = page_to_phys(s->page) + s->offset; | ||
304 | if (nonforced_iommu(dev, addr, s->length)) { | ||
305 | addr = dma_map_area(dev, addr, s->length, dir); | ||
306 | if (addr == bad_dma_address) { | ||
307 | if (i > 0) | ||
308 | gart_unmap_sg(dev, sg, i, dir); | ||
309 | nents = 0; | ||
310 | sg[0].dma_length = 0; | ||
311 | break; | ||
312 | } | ||
313 | } | ||
314 | s->dma_address = addr; | ||
315 | s->dma_length = s->length; | ||
316 | } | ||
317 | flush_gart(); | ||
318 | return nents; | ||
319 | } | ||
320 | |||
321 | /* Map multiple scatterlist entries continuous into the first. */ | ||
322 | static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, | ||
323 | struct scatterlist *sout, unsigned long pages) | ||
324 | { | ||
325 | unsigned long iommu_start = alloc_iommu(pages); | ||
326 | unsigned long iommu_page = iommu_start; | ||
327 | int i; | ||
328 | |||
329 | if (iommu_start == -1) | ||
330 | return -1; | ||
331 | |||
332 | for (i = start; i < stopat; i++) { | ||
333 | struct scatterlist *s = &sg[i]; | ||
334 | unsigned long pages, addr; | ||
335 | unsigned long phys_addr = s->dma_address; | ||
336 | |||
337 | BUG_ON(i > start && s->offset); | ||
338 | if (i == start) { | ||
339 | *sout = *s; | ||
340 | sout->dma_address = iommu_bus_base; | ||
341 | sout->dma_address += iommu_page*PAGE_SIZE + s->offset; | ||
342 | sout->dma_length = s->length; | ||
343 | } else { | ||
344 | sout->dma_length += s->length; | ||
345 | } | ||
346 | |||
347 | addr = phys_addr; | ||
348 | pages = to_pages(s->offset, s->length); | ||
349 | while (pages--) { | ||
350 | iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); | ||
351 | SET_LEAK(iommu_page); | ||
352 | addr += PAGE_SIZE; | ||
353 | iommu_page++; | ||
354 | } | ||
355 | } | ||
356 | BUG_ON(iommu_page - iommu_start != pages); | ||
357 | return 0; | ||
358 | } | ||
359 | |||
360 | static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat, | ||
361 | struct scatterlist *sout, | ||
362 | unsigned long pages, int need) | ||
363 | { | ||
364 | if (!need) { | ||
365 | BUG_ON(stopat - start != 1); | ||
366 | *sout = sg[start]; | ||
367 | sout->dma_length = sg[start].length; | ||
368 | return 0; | ||
369 | } | ||
370 | return __dma_map_cont(sg, start, stopat, sout, pages); | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * DMA map all entries in a scatterlist. | ||
375 | * Merge chunks that have page aligned sizes into a continuous mapping. | ||
376 | */ | ||
377 | int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | ||
378 | { | ||
379 | int i; | ||
380 | int out; | ||
381 | int start; | ||
382 | unsigned long pages = 0; | ||
383 | int need = 0, nextneed; | ||
384 | |||
385 | if (nents == 0) | ||
386 | return 0; | ||
387 | |||
388 | if (!dev) | ||
389 | dev = &fallback_dev; | ||
390 | |||
391 | out = 0; | ||
392 | start = 0; | ||
393 | for (i = 0; i < nents; i++) { | ||
394 | struct scatterlist *s = &sg[i]; | ||
395 | dma_addr_t addr = page_to_phys(s->page) + s->offset; | ||
396 | s->dma_address = addr; | ||
397 | BUG_ON(s->length == 0); | ||
398 | |||
399 | nextneed = need_iommu(dev, addr, s->length); | ||
400 | |||
401 | /* Handle the previous not yet processed entries */ | ||
402 | if (i > start) { | ||
403 | struct scatterlist *ps = &sg[i-1]; | ||
404 | /* Can only merge when the last chunk ends on a page | ||
405 | boundary and the new one doesn't have an offset. */ | ||
406 | if (!iommu_merge || !nextneed || !need || s->offset || | ||
407 | (ps->offset + ps->length) % PAGE_SIZE) { | ||
408 | if (dma_map_cont(sg, start, i, sg+out, pages, | ||
409 | need) < 0) | ||
410 | goto error; | ||
411 | out++; | ||
412 | pages = 0; | ||
413 | start = i; | ||
414 | } | ||
415 | } | ||
416 | |||
417 | need = nextneed; | ||
418 | pages += to_pages(s->offset, s->length); | ||
419 | } | ||
420 | if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0) | ||
421 | goto error; | ||
422 | out++; | ||
423 | flush_gart(); | ||
424 | if (out < nents) | ||
425 | sg[out].dma_length = 0; | ||
426 | return out; | ||
427 | |||
428 | error: | ||
429 | flush_gart(); | ||
430 | gart_unmap_sg(dev, sg, nents, dir); | ||
431 | /* When it was forced or merged try again in a dumb way */ | ||
432 | if (force_iommu || iommu_merge) { | ||
433 | out = dma_map_sg_nonforce(dev, sg, nents, dir); | ||
434 | if (out > 0) | ||
435 | return out; | ||
436 | } | ||
437 | if (panic_on_overflow) | ||
438 | panic("dma_map_sg: overflow on %lu pages\n", pages); | ||
439 | iommu_full(dev, pages << PAGE_SHIFT, dir); | ||
440 | for (i = 0; i < nents; i++) | ||
441 | sg[i].dma_address = bad_dma_address; | ||
442 | return 0; | ||
443 | } | ||
444 | |||
445 | static int no_agp; | ||
446 | |||
447 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | ||
448 | { | ||
449 | unsigned long a; | ||
450 | if (!iommu_size) { | ||
451 | iommu_size = aper_size; | ||
452 | if (!no_agp) | ||
453 | iommu_size /= 2; | ||
454 | } | ||
455 | |||
456 | a = aper + iommu_size; | ||
457 | iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a; | ||
458 | |||
459 | if (iommu_size < 64*1024*1024) | ||
460 | printk(KERN_WARNING | ||
461 | "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20); | ||
462 | |||
463 | return iommu_size; | ||
464 | } | ||
465 | |||
466 | static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) | ||
467 | { | ||
468 | unsigned aper_size = 0, aper_base_32; | ||
469 | u64 aper_base; | ||
470 | unsigned aper_order; | ||
471 | |||
472 | pci_read_config_dword(dev, 0x94, &aper_base_32); | ||
473 | pci_read_config_dword(dev, 0x90, &aper_order); | ||
474 | aper_order = (aper_order >> 1) & 7; | ||
475 | |||
476 | aper_base = aper_base_32 & 0x7fff; | ||
477 | aper_base <<= 25; | ||
478 | |||
479 | aper_size = (32 * 1024 * 1024) << aper_order; | ||
480 | if (aper_base + aper_size > 0x100000000UL || !aper_size) | ||
481 | aper_base = 0; | ||
482 | |||
483 | *size = aper_size; | ||
484 | return aper_base; | ||
485 | } | ||
486 | |||
487 | /* | ||
488 | * Private Northbridge GATT initialization in case we cannot use the | ||
489 | * AGP driver for some reason. | ||
490 | */ | ||
491 | static __init int init_k8_gatt(struct agp_kern_info *info) | ||
492 | { | ||
493 | struct pci_dev *dev; | ||
494 | void *gatt; | ||
495 | unsigned aper_base, new_aper_base; | ||
496 | unsigned aper_size, gatt_size, new_aper_size; | ||
497 | int i; | ||
498 | |||
499 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); | ||
500 | aper_size = aper_base = info->aper_size = 0; | ||
501 | dev = NULL; | ||
502 | for (i = 0; i < num_k8_northbridges; i++) { | ||
503 | dev = k8_northbridges[i]; | ||
504 | new_aper_base = read_aperture(dev, &new_aper_size); | ||
505 | if (!new_aper_base) | ||
506 | goto nommu; | ||
507 | |||
508 | if (!aper_base) { | ||
509 | aper_size = new_aper_size; | ||
510 | aper_base = new_aper_base; | ||
511 | } | ||
512 | if (aper_size != new_aper_size || aper_base != new_aper_base) | ||
513 | goto nommu; | ||
514 | } | ||
515 | if (!aper_base) | ||
516 | goto nommu; | ||
517 | info->aper_base = aper_base; | ||
518 | info->aper_size = aper_size>>20; | ||
519 | |||
520 | gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); | ||
521 | gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); | ||
522 | if (!gatt) | ||
523 | panic("Cannot allocate GATT table"); | ||
524 | if (change_page_attr_addr((unsigned long)gatt, gatt_size >> PAGE_SHIFT, PAGE_KERNEL_NOCACHE)) | ||
525 | panic("Could not set GART PTEs to uncacheable pages"); | ||
526 | global_flush_tlb(); | ||
527 | |||
528 | memset(gatt, 0, gatt_size); | ||
529 | agp_gatt_table = gatt; | ||
530 | |||
531 | for (i = 0; i < num_k8_northbridges; i++) { | ||
532 | u32 ctl; | ||
533 | u32 gatt_reg; | ||
534 | |||
535 | dev = k8_northbridges[i]; | ||
536 | gatt_reg = __pa(gatt) >> 12; | ||
537 | gatt_reg <<= 4; | ||
538 | pci_write_config_dword(dev, 0x98, gatt_reg); | ||
539 | pci_read_config_dword(dev, 0x90, &ctl); | ||
540 | |||
541 | ctl |= 1; | ||
542 | ctl &= ~((1<<4) | (1<<5)); | ||
543 | |||
544 | pci_write_config_dword(dev, 0x90, ctl); | ||
545 | } | ||
546 | flush_gart(); | ||
547 | |||
548 | printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); | ||
549 | return 0; | ||
550 | |||
551 | nommu: | ||
552 | /* Should not happen anymore */ | ||
553 | printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n" | ||
554 | KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.\n"); | ||
555 | return -1; | ||
556 | } | ||
557 | |||
558 | extern int agp_amd64_init(void); | ||
559 | |||
560 | static const struct dma_mapping_ops gart_dma_ops = { | ||
561 | .mapping_error = NULL, | ||
562 | .map_single = gart_map_single, | ||
563 | .map_simple = gart_map_simple, | ||
564 | .unmap_single = gart_unmap_single, | ||
565 | .sync_single_for_cpu = NULL, | ||
566 | .sync_single_for_device = NULL, | ||
567 | .sync_single_range_for_cpu = NULL, | ||
568 | .sync_single_range_for_device = NULL, | ||
569 | .sync_sg_for_cpu = NULL, | ||
570 | .sync_sg_for_device = NULL, | ||
571 | .map_sg = gart_map_sg, | ||
572 | .unmap_sg = gart_unmap_sg, | ||
573 | }; | ||
574 | |||
575 | void gart_iommu_shutdown(void) | ||
576 | { | ||
577 | struct pci_dev *dev; | ||
578 | int i; | ||
579 | |||
580 | if (no_agp && (dma_ops != &gart_dma_ops)) | ||
581 | return; | ||
582 | |||
583 | for (i = 0; i < num_k8_northbridges; i++) { | ||
584 | u32 ctl; | ||
585 | |||
586 | dev = k8_northbridges[i]; | ||
587 | pci_read_config_dword(dev, 0x90, &ctl); | ||
588 | |||
589 | ctl &= ~1; | ||
590 | |||
591 | pci_write_config_dword(dev, 0x90, ctl); | ||
592 | } | ||
593 | } | ||
594 | |||
595 | void __init gart_iommu_init(void) | ||
596 | { | ||
597 | struct agp_kern_info info; | ||
598 | unsigned long aper_size; | ||
599 | unsigned long iommu_start; | ||
600 | unsigned long scratch; | ||
601 | long i; | ||
602 | |||
603 | if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) { | ||
604 | printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n"); | ||
605 | return; | ||
606 | } | ||
607 | |||
608 | #ifndef CONFIG_AGP_AMD64 | ||
609 | no_agp = 1; | ||
610 | #else | ||
611 | /* Makefile puts PCI initialization via subsys_initcall first. */ | ||
612 | /* Add other K8 AGP bridge drivers here */ | ||
613 | no_agp = no_agp || | ||
614 | (agp_amd64_init() < 0) || | ||
615 | (agp_copy_info(agp_bridge, &info) < 0); | ||
616 | #endif | ||
617 | |||
618 | if (swiotlb) | ||
619 | return; | ||
620 | |||
621 | /* Did we detect a different HW IOMMU? */ | ||
622 | if (iommu_detected && !iommu_aperture) | ||
623 | return; | ||
624 | |||
625 | if (no_iommu || | ||
626 | (!force_iommu && end_pfn <= MAX_DMA32_PFN) || | ||
627 | !iommu_aperture || | ||
628 | (no_agp && init_k8_gatt(&info) < 0)) { | ||
629 | if (end_pfn > MAX_DMA32_PFN) { | ||
630 | printk(KERN_ERR "WARNING more than 4GB of memory " | ||
631 | "but GART IOMMU not available.\n" | ||
632 | KERN_ERR "WARNING 32bit PCI may malfunction.\n"); | ||
633 | } | ||
634 | return; | ||
635 | } | ||
636 | |||
637 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); | ||
638 | aper_size = info.aper_size * 1024 * 1024; | ||
639 | iommu_size = check_iommu_size(info.aper_base, aper_size); | ||
640 | iommu_pages = iommu_size >> PAGE_SHIFT; | ||
641 | |||
642 | iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL, | ||
643 | get_order(iommu_pages/8)); | ||
644 | if (!iommu_gart_bitmap) | ||
645 | panic("Cannot allocate iommu bitmap\n"); | ||
646 | memset(iommu_gart_bitmap, 0, iommu_pages/8); | ||
647 | |||
648 | #ifdef CONFIG_IOMMU_LEAK | ||
649 | if (leak_trace) { | ||
650 | iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, | ||
651 | get_order(iommu_pages*sizeof(void *))); | ||
652 | if (iommu_leak_tab) | ||
653 | memset(iommu_leak_tab, 0, iommu_pages * 8); | ||
654 | else | ||
655 | printk("PCI-DMA: Cannot allocate leak trace area\n"); | ||
656 | } | ||
657 | #endif | ||
658 | |||
659 | /* | ||
660 | * Out of IOMMU space handling. | ||
661 | * Reserve some invalid pages at the beginning of the GART. | ||
662 | */ | ||
663 | set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); | ||
664 | |||
665 | agp_memory_reserved = iommu_size; | ||
666 | printk(KERN_INFO | ||
667 | "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", | ||
668 | iommu_size>>20); | ||
669 | |||
670 | iommu_start = aper_size - iommu_size; | ||
671 | iommu_bus_base = info.aper_base + iommu_start; | ||
672 | bad_dma_address = iommu_bus_base; | ||
673 | iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); | ||
674 | |||
675 | /* | ||
676 | * Unmap the IOMMU part of the GART. The alias of the page is | ||
677 | * always mapped with cache enabled and there is no full cache | ||
678 | * coherency across the GART remapping. The unmapping avoids | ||
679 | * automatic prefetches from the CPU allocating cache lines in | ||
680 | * there. All CPU accesses are done via the direct mapping to | ||
681 | * the backing memory. The GART address is only used by PCI | ||
682 | * devices. | ||
683 | */ | ||
684 | clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size); | ||
685 | |||
686 | /* | ||
687 | * Try to workaround a bug (thanks to BenH) | ||
688 | * Set unmapped entries to a scratch page instead of 0. | ||
689 | * Any prefetches that hit unmapped entries won't get an bus abort | ||
690 | * then. | ||
691 | */ | ||
692 | scratch = get_zeroed_page(GFP_KERNEL); | ||
693 | if (!scratch) | ||
694 | panic("Cannot allocate iommu scratch page"); | ||
695 | gart_unmapped_entry = GPTE_ENCODE(__pa(scratch)); | ||
696 | for (i = EMERGENCY_PAGES; i < iommu_pages; i++) | ||
697 | iommu_gatt_base[i] = gart_unmapped_entry; | ||
698 | |||
699 | flush_gart(); | ||
700 | dma_ops = &gart_dma_ops; | ||
701 | } | ||
702 | |||
703 | void __init gart_parse_options(char *p) | ||
704 | { | ||
705 | int arg; | ||
706 | |||
707 | #ifdef CONFIG_IOMMU_LEAK | ||
708 | if (!strncmp(p,"leak",4)) { | ||
709 | leak_trace = 1; | ||
710 | p += 4; | ||
711 | if (*p == '=') ++p; | ||
712 | if (isdigit(*p) && get_option(&p, &arg)) | ||
713 | iommu_leak_pages = arg; | ||
714 | } | ||
715 | #endif | ||
716 | if (isdigit(*p) && get_option(&p, &arg)) | ||
717 | iommu_size = arg; | ||
718 | if (!strncmp(p, "fullflush",8)) | ||
719 | iommu_fullflush = 1; | ||
720 | if (!strncmp(p, "nofullflush",11)) | ||
721 | iommu_fullflush = 0; | ||
722 | if (!strncmp(p,"noagp",5)) | ||
723 | no_agp = 1; | ||
724 | if (!strncmp(p, "noaperture",10)) | ||
725 | fix_aperture = 0; | ||
726 | /* duplicated from pci-dma.c */ | ||
727 | if (!strncmp(p,"force",5)) | ||
728 | iommu_aperture_allowed = 1; | ||
729 | if (!strncmp(p,"allowed",7)) | ||
730 | iommu_aperture_allowed = 1; | ||
731 | if (!strncmp(p, "memaper", 7)) { | ||
732 | fallback_aper_force = 1; | ||
733 | p += 7; | ||
734 | if (*p == '=') { | ||
735 | ++p; | ||
736 | if (get_option(&p, &arg)) | ||
737 | fallback_aper_order = arg; | ||
738 | } | ||
739 | } | ||
740 | } | ||