aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2006-06-26 07:56:40 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-26 13:48:15 -0400
commita32073bffc656ca4bde6002b6cf7c1a8e0e22712 (patch)
tree5ddcd3107eca8807685a19490c2c849d728a51a6
parent7c2d9cd218916276e52a5dae827b84a159fe5c96 (diff)
[PATCH] x86_64: Clean and enhance up K8 northbridge access code
- Factor out the duplicated access/cache code into a single file * Shared between i386/x86-64. - Share flush code between AGP and IOMMU * Fix a bug: AGP didn't wait for end of flush before - Drop 8 northbridges limit and allocate dynamically - Add lock to serialize AGP and IOMMU GART flushes - Add PCI ID for next AMD northbridge - Random related cleanups The old K8 NUMA discovery code is unchanged. New systems should all use SRAT for this. Cc: "Navin Boppuri" <navin.boppuri@newisys.com> Cc: Dave Jones <davej@redhat.com> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/i386/Kconfig4
-rw-r--r--arch/i386/kernel/Makefile4
-rw-r--r--arch/x86_64/Kconfig4
-rw-r--r--arch/x86_64/kernel/Makefile1
-rw-r--r--arch/x86_64/kernel/aperture.c24
-rw-r--r--arch/x86_64/kernel/k8.c118
-rw-r--r--arch/x86_64/kernel/pci-gart.c93
-rw-r--r--arch/x86_64/pci/k8-bus.c10
-rw-r--r--drivers/char/agp/amd64-agp.c77
-rw-r--r--include/asm-i386/k8.h1
-rw-r--r--include/asm-x86_64/k8.h14
11 files changed, 209 insertions, 141 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 1596101cfaf8..2206bf6637de 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -1054,6 +1054,10 @@ config SCx200
1054 This support is also available as a module. If compiled as a 1054 This support is also available as a module. If compiled as a
1055 module, it will be called scx200. 1055 module, it will be called scx200.
1056 1056
1057config K8_NB
1058 def_bool y
1059 depends on AGP_AMD64
1060
1057source "drivers/pcmcia/Kconfig" 1061source "drivers/pcmcia/Kconfig"
1058 1062
1059source "drivers/pci/hotplug/Kconfig" 1063source "drivers/pci/hotplug/Kconfig"
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 96fb8a020af2..28b14d6c7b1a 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o
37obj-$(CONFIG_DOUBLEFAULT) += doublefault.o 37obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
38obj-$(CONFIG_VM86) += vm86.o 38obj-$(CONFIG_VM86) += vm86.o
39obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 39obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
40obj-$(CONFIG_K8_NB) += k8.o
40 41
41EXTRA_AFLAGS := -traditional 42EXTRA_AFLAGS := -traditional
42 43
@@ -76,3 +77,6 @@ SYSCFLAGS_vsyscall-syms.o = -r
76$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ 77$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
77 $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE 78 $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
78 $(call if_changed,syscall) 79 $(call if_changed,syscall)
80
81k8-y += ../../x86_64/kernel/k8.o
82
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index af44130f0d65..fc75275d8c72 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -501,6 +501,10 @@ config REORDER
501 optimal TLB usage. If you have pretty much any version of binutils, 501 optimal TLB usage. If you have pretty much any version of binutils,
502 this can increase your kernel build time by roughly one minute. 502 this can increase your kernel build time by roughly one minute.
503 503
504config K8_NB
505 def_bool y
506 depends on AGP_AMD64 || GART_IOMMU || (PCI && NUMA)
507
504endmenu 508endmenu
505 509
506# 510#
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 381bc6ad743e..f927d11065fe 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
33obj-$(CONFIG_KPROBES) += kprobes.o 33obj-$(CONFIG_KPROBES) += kprobes.o
34obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o 34obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
35obj-$(CONFIG_X86_VSMP) += vsmp.o 35obj-$(CONFIG_X86_VSMP) += vsmp.o
36obj-$(CONFIG_K8_NB) += k8.o
36 37
37obj-$(CONFIG_MODULES) += module.o 38obj-$(CONFIG_MODULES) += module.o
38 39
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index 70b9d21ed675..a7ad03ee98cf 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -24,6 +24,7 @@
24#include <asm/proto.h> 24#include <asm/proto.h>
25#include <asm/pci-direct.h> 25#include <asm/pci-direct.h>
26#include <asm/dma.h> 26#include <asm/dma.h>
27#include <asm/k8.h>
27 28
28int iommu_aperture; 29int iommu_aperture;
29int iommu_aperture_disabled __initdata = 0; 30int iommu_aperture_disabled __initdata = 0;
@@ -37,8 +38,6 @@ int fix_aperture __initdata = 1;
37/* This code runs before the PCI subsystem is initialized, so just 38/* This code runs before the PCI subsystem is initialized, so just
38 access the northbridge directly. */ 39 access the northbridge directly. */
39 40
40#define NB_ID_3 (PCI_VENDOR_ID_AMD | (0x1103<<16))
41
42static u32 __init allocate_aperture(void) 41static u32 __init allocate_aperture(void)
43{ 42{
44 pg_data_t *nd0 = NODE_DATA(0); 43 pg_data_t *nd0 = NODE_DATA(0);
@@ -68,20 +67,20 @@ static u32 __init allocate_aperture(void)
68 return (u32)__pa(p); 67 return (u32)__pa(p);
69} 68}
70 69
71static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size) 70static int __init aperture_valid(u64 aper_base, u32 aper_size)
72{ 71{
73 if (!aper_base) 72 if (!aper_base)
74 return 0; 73 return 0;
75 if (aper_size < 64*1024*1024) { 74 if (aper_size < 64*1024*1024) {
76 printk("Aperture from %s too small (%d MB)\n", name, aper_size>>20); 75 printk("Aperture too small (%d MB)\n", aper_size>>20);
77 return 0; 76 return 0;
78 } 77 }
79 if (aper_base + aper_size >= 0xffffffff) { 78 if (aper_base + aper_size >= 0xffffffff) {
80 printk("Aperture from %s beyond 4GB. Ignoring.\n",name); 79 printk("Aperture beyond 4GB. Ignoring.\n");
81 return 0; 80 return 0;
82 } 81 }
83 if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { 82 if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
84 printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name); 83 printk("Aperture pointing to e820 RAM. Ignoring.\n");
85 return 0; 84 return 0;
86 } 85 }
87 return 1; 86 return 1;
@@ -140,7 +139,7 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
140 printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", 139 printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
141 aper, 32 << *order, apsizereg); 140 aper, 32 << *order, apsizereg);
142 141
143 if (!aperture_valid("AGP bridge", aper, (32*1024*1024) << *order)) 142 if (!aperture_valid(aper, (32*1024*1024) << *order))
144 return 0; 143 return 0;
145 return (u32)aper; 144 return (u32)aper;
146} 145}
@@ -208,9 +207,8 @@ void __init iommu_hole_init(void)
208 207
209 fix = 0; 208 fix = 0;
210 for (num = 24; num < 32; num++) { 209 for (num = 24; num < 32; num++) {
211 char name[30]; 210 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
212 if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) 211 continue;
213 continue;
214 212
215 iommu_aperture = 1; 213 iommu_aperture = 1;
216 214
@@ -222,9 +220,7 @@ void __init iommu_hole_init(void)
222 printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, 220 printk("CPU %d: aperture @ %Lx size %u MB\n", num-24,
223 aper_base, aper_size>>20); 221 aper_base, aper_size>>20);
224 222
225 sprintf(name, "northbridge cpu %d", num-24); 223 if (!aperture_valid(aper_base, aper_size)) {
226
227 if (!aperture_valid(name, aper_base, aper_size)) {
228 fix = 1; 224 fix = 1;
229 break; 225 break;
230 } 226 }
@@ -273,7 +269,7 @@ void __init iommu_hole_init(void)
273 269
274 /* Fix up the north bridges */ 270 /* Fix up the north bridges */
275 for (num = 24; num < 32; num++) { 271 for (num = 24; num < 32; num++) {
276 if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) 272 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
277 continue; 273 continue;
278 274
279 /* Don't enable translation yet. That is done later. 275 /* Don't enable translation yet. That is done later.
diff --git a/arch/x86_64/kernel/k8.c b/arch/x86_64/kernel/k8.c
new file mode 100644
index 000000000000..6416682d33d0
--- /dev/null
+++ b/arch/x86_64/kernel/k8.c
@@ -0,0 +1,118 @@
1/*
2 * Shared support code for AMD K8 northbridges and derivates.
3 * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
4 */
5#include <linux/gfp.h>
6#include <linux/types.h>
7#include <linux/init.h>
8#include <linux/errno.h>
9#include <linux/module.h>
10#include <linux/spinlock.h>
11#include <asm/k8.h>
12
13int num_k8_northbridges;
14EXPORT_SYMBOL(num_k8_northbridges);
15
16static u32 *flush_words;
17
18struct pci_device_id k8_nb_ids[] = {
19 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
21 {}
22};
23EXPORT_SYMBOL(k8_nb_ids);
24
25struct pci_dev **k8_northbridges;
26EXPORT_SYMBOL(k8_northbridges);
27
28static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
29{
30 do {
31 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
32 if (!dev)
33 break;
34 } while (!pci_match_id(&k8_nb_ids[0], dev));
35 return dev;
36}
37
38int cache_k8_northbridges(void)
39{
40 int i;
41 struct pci_dev *dev;
42 if (num_k8_northbridges)
43 return 0;
44
45 num_k8_northbridges = 0;
46 dev = NULL;
47 while ((dev = next_k8_northbridge(dev)) != NULL)
48 num_k8_northbridges++;
49
50 k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *),
51 GFP_KERNEL);
52 if (!k8_northbridges)
53 return -ENOMEM;
54
55 flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL);
56 if (!flush_words) {
57 kfree(k8_northbridges);
58 return -ENOMEM;
59 }
60
61 dev = NULL;
62 i = 0;
63 while ((dev = next_k8_northbridge(dev)) != NULL) {
64 k8_northbridges[i++] = dev;
65 pci_read_config_dword(dev, 0x9c, &flush_words[i]);
66 }
67 k8_northbridges[i] = NULL;
68 return 0;
69}
70EXPORT_SYMBOL_GPL(cache_k8_northbridges);
71
72/* Ignores subdevice/subvendor but as far as I can figure out
73 they're useless anyways */
74int __init early_is_k8_nb(u32 device)
75{
76 struct pci_device_id *id;
77 u32 vendor = device & 0xffff;
78 device >>= 16;
79 for (id = k8_nb_ids; id->vendor; id++)
80 if (vendor == id->vendor && device == id->device)
81 return 1;
82 return 0;
83}
84
85void k8_flush_garts(void)
86{
87 int flushed, i;
88 unsigned long flags;
89 static DEFINE_SPINLOCK(gart_lock);
90
91 /* Avoid races between AGP and IOMMU. In theory it's not needed
92 but I'm not sure if the hardware won't lose flush requests
93 when another is pending. This whole thing is so expensive anyways
94 that it doesn't matter to serialize more. -AK */
95 spin_lock_irqsave(&gart_lock, flags);
96 flushed = 0;
97 for (i = 0; i < num_k8_northbridges; i++) {
98 pci_write_config_dword(k8_northbridges[i], 0x9c,
99 flush_words[i]|1);
100 flushed++;
101 }
102 for (i = 0; i < num_k8_northbridges; i++) {
103 u32 w;
104 /* Make sure the hardware actually executed the flush*/
105 for (;;) {
106 pci_read_config_dword(k8_northbridges[i],
107 0x9c, &w);
108 if (!(w & 1))
109 break;
110 cpu_relax();
111 }
112 }
113 spin_unlock_irqrestore(&gart_lock, flags);
114 if (!flushed)
115 printk("nothing to flush?\n");
116}
117EXPORT_SYMBOL_GPL(k8_flush_garts);
118
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index ea8f4041794e..ded3af3bceec 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -32,6 +32,7 @@
32#include <asm/kdebug.h> 32#include <asm/kdebug.h>
33#include <asm/swiotlb.h> 33#include <asm/swiotlb.h>
34#include <asm/dma.h> 34#include <asm/dma.h>
35#include <asm/k8.h>
35 36
36unsigned long iommu_bus_base; /* GART remapping area (physical) */ 37unsigned long iommu_bus_base; /* GART remapping area (physical) */
37static unsigned long iommu_size; /* size of remapping area bytes */ 38static unsigned long iommu_size; /* size of remapping area bytes */
@@ -46,8 +47,6 @@ u32 *iommu_gatt_base; /* Remapping table */
46 also seen with Qlogic at least). */ 47 also seen with Qlogic at least). */
47int iommu_fullflush = 1; 48int iommu_fullflush = 1;
48 49
49#define MAX_NB 8
50
51/* Allocation bitmap for the remapping area */ 50/* Allocation bitmap for the remapping area */
52static DEFINE_SPINLOCK(iommu_bitmap_lock); 51static DEFINE_SPINLOCK(iommu_bitmap_lock);
53static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ 52static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
@@ -63,13 +62,6 @@ static u32 gart_unmapped_entry;
63#define to_pages(addr,size) \ 62#define to_pages(addr,size) \
64 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) 63 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
65 64
66#define for_all_nb(dev) \
67 dev = NULL; \
68 while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)
69
70static struct pci_dev *northbridges[MAX_NB];
71static u32 northbridge_flush_word[MAX_NB];
72
73#define EMERGENCY_PAGES 32 /* = 128KB */ 65#define EMERGENCY_PAGES 32 /* = 128KB */
74 66
75#ifdef CONFIG_AGP 67#ifdef CONFIG_AGP
@@ -120,44 +112,17 @@ static void free_iommu(unsigned long offset, int size)
120/* 112/*
121 * Use global flush state to avoid races with multiple flushers. 113 * Use global flush state to avoid races with multiple flushers.
122 */ 114 */
123static void flush_gart(struct device *dev) 115static void flush_gart(void)
124{ 116{
125 unsigned long flags; 117 unsigned long flags;
126 int flushed = 0;
127 int i, max;
128
129 spin_lock_irqsave(&iommu_bitmap_lock, flags); 118 spin_lock_irqsave(&iommu_bitmap_lock, flags);
130 if (need_flush) { 119 if (need_flush) {
131 max = 0; 120 k8_flush_garts();
132 for (i = 0; i < MAX_NB; i++) {
133 if (!northbridges[i])
134 continue;
135 pci_write_config_dword(northbridges[i], 0x9c,
136 northbridge_flush_word[i] | 1);
137 flushed++;
138 max = i;
139 }
140 for (i = 0; i <= max; i++) {
141 u32 w;
142 if (!northbridges[i])
143 continue;
144 /* Make sure the hardware actually executed the flush. */
145 for (;;) {
146 pci_read_config_dword(northbridges[i], 0x9c, &w);
147 if (!(w & 1))
148 break;
149 cpu_relax();
150 }
151 }
152 if (!flushed)
153 printk("nothing to flush?\n");
154 need_flush = 0; 121 need_flush = 0;
155 } 122 }
156 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 123 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
157} 124}
158 125
159
160
161#ifdef CONFIG_IOMMU_LEAK 126#ifdef CONFIG_IOMMU_LEAK
162 127
163#define SET_LEAK(x) if (iommu_leak_tab) \ 128#define SET_LEAK(x) if (iommu_leak_tab) \
@@ -266,7 +231,7 @@ static dma_addr_t gart_map_simple(struct device *dev, char *buf,
266 size_t size, int dir) 231 size_t size, int dir)
267{ 232{
268 dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); 233 dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
269 flush_gart(dev); 234 flush_gart();
270 return map; 235 return map;
271} 236}
272 237
@@ -351,7 +316,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
351 s->dma_address = addr; 316 s->dma_address = addr;
352 s->dma_length = s->length; 317 s->dma_length = s->length;
353 } 318 }
354 flush_gart(dev); 319 flush_gart();
355 return nents; 320 return nents;
356} 321}
357 322
@@ -458,13 +423,13 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
458 if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0) 423 if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
459 goto error; 424 goto error;
460 out++; 425 out++;
461 flush_gart(dev); 426 flush_gart();
462 if (out < nents) 427 if (out < nents)
463 sg[out].dma_length = 0; 428 sg[out].dma_length = 0;
464 return out; 429 return out;
465 430
466error: 431error:
467 flush_gart(NULL); 432 flush_gart();
468 gart_unmap_sg(dev, sg, nents, dir); 433 gart_unmap_sg(dev, sg, nents, dir);
469 /* When it was forced or merged try again in a dumb way */ 434 /* When it was forced or merged try again in a dumb way */
470 if (force_iommu || iommu_merge) { 435 if (force_iommu || iommu_merge) {
@@ -532,10 +497,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
532 void *gatt; 497 void *gatt;
533 unsigned aper_base, new_aper_base; 498 unsigned aper_base, new_aper_base;
534 unsigned aper_size, gatt_size, new_aper_size; 499 unsigned aper_size, gatt_size, new_aper_size;
535 500 int i;
501
536 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); 502 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
537 aper_size = aper_base = info->aper_size = 0; 503 aper_size = aper_base = info->aper_size = 0;
538 for_all_nb(dev) { 504 dev = NULL;
505 for (i = 0; i < num_k8_northbridges; i++) {
506 dev = k8_northbridges[i];
539 new_aper_base = read_aperture(dev, &new_aper_size); 507 new_aper_base = read_aperture(dev, &new_aper_size);
540 if (!new_aper_base) 508 if (!new_aper_base)
541 goto nommu; 509 goto nommu;
@@ -558,11 +526,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
558 panic("Cannot allocate GATT table"); 526 panic("Cannot allocate GATT table");
559 memset(gatt, 0, gatt_size); 527 memset(gatt, 0, gatt_size);
560 agp_gatt_table = gatt; 528 agp_gatt_table = gatt;
561 529
562 for_all_nb(dev) { 530 for (i = 0; i < num_k8_northbridges; i++) {
563 u32 ctl; 531 u32 ctl;
564 u32 gatt_reg; 532 u32 gatt_reg;
565 533
534 dev = k8_northbridges[i];
566 gatt_reg = __pa(gatt) >> 12; 535 gatt_reg = __pa(gatt) >> 12;
567 gatt_reg <<= 4; 536 gatt_reg <<= 4;
568 pci_write_config_dword(dev, 0x98, gatt_reg); 537 pci_write_config_dword(dev, 0x98, gatt_reg);
@@ -573,7 +542,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
573 542
574 pci_write_config_dword(dev, 0x90, ctl); 543 pci_write_config_dword(dev, 0x90, ctl);
575 } 544 }
576 flush_gart(NULL); 545 flush_gart();
577 546
578 printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); 547 printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10);
579 return 0; 548 return 0;
@@ -607,10 +576,14 @@ static int __init pci_iommu_init(void)
607 struct agp_kern_info info; 576 struct agp_kern_info info;
608 unsigned long aper_size; 577 unsigned long aper_size;
609 unsigned long iommu_start; 578 unsigned long iommu_start;
610 struct pci_dev *dev;
611 unsigned long scratch; 579 unsigned long scratch;
612 long i; 580 long i;
613 581
582 if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) {
583 printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n");
584 return -1;
585 }
586
614#ifndef CONFIG_AGP_AMD64 587#ifndef CONFIG_AGP_AMD64
615 no_agp = 1; 588 no_agp = 1;
616#else 589#else
@@ -637,14 +610,6 @@ static int __init pci_iommu_init(void)
637 return -1; 610 return -1;
638 } 611 }
639 612
640 i = 0;
641 for_all_nb(dev)
642 i++;
643 if (i > MAX_NB) {
644 printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i);
645 return -1;
646 }
647
648 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); 613 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
649 aper_size = info.aper_size * 1024 * 1024; 614 aper_size = info.aper_size * 1024 * 1024;
650 iommu_size = check_iommu_size(info.aper_base, aper_size); 615 iommu_size = check_iommu_size(info.aper_base, aper_size);
@@ -707,20 +672,8 @@ static int __init pci_iommu_init(void)
707 for (i = EMERGENCY_PAGES; i < iommu_pages; i++) 672 for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
708 iommu_gatt_base[i] = gart_unmapped_entry; 673 iommu_gatt_base[i] = gart_unmapped_entry;
709 674
710 for_all_nb(dev) { 675 flush_gart();
711 u32 flag;
712 int cpu = PCI_SLOT(dev->devfn) - 24;
713 if (cpu >= MAX_NB)
714 continue;
715 northbridges[cpu] = dev;
716 pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
717 northbridge_flush_word[cpu] = flag;
718 }
719
720 flush_gart(NULL);
721
722 dma_ops = &gart_dma_ops; 676 dma_ops = &gart_dma_ops;
723
724 return 0; 677 return 0;
725} 678}
726 679
diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c
index 3acf60ded2a0..b50a7c7c47f8 100644
--- a/arch/x86_64/pci/k8-bus.c
+++ b/arch/x86_64/pci/k8-bus.c
@@ -2,6 +2,7 @@
2#include <linux/pci.h> 2#include <linux/pci.h>
3#include <asm/mpspec.h> 3#include <asm/mpspec.h>
4#include <linux/cpumask.h> 4#include <linux/cpumask.h>
5#include <asm/k8.h>
5 6
6/* 7/*
7 * This discovers the pcibus <-> node mapping on AMD K8. 8 * This discovers the pcibus <-> node mapping on AMD K8.
@@ -18,7 +19,6 @@
18#define NR_LDT_BUS_NUMBER_REGISTERS 3 19#define NR_LDT_BUS_NUMBER_REGISTERS 3
19#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF) 20#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF)
20#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) 21#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF)
21#define PCI_DEVICE_ID_K8HTCONFIG 0x1100
22 22
23/** 23/**
24 * fill_mp_bus_to_cpumask() 24 * fill_mp_bus_to_cpumask()
@@ -28,8 +28,7 @@
28__init static int 28__init static int
29fill_mp_bus_to_cpumask(void) 29fill_mp_bus_to_cpumask(void)
30{ 30{
31 struct pci_dev *nb_dev = NULL; 31 int i, j, k;
32 int i, j;
33 u32 ldtbus, nid; 32 u32 ldtbus, nid;
34 static int lbnr[3] = { 33 static int lbnr[3] = {
35 LDT_BUS_NUMBER_REGISTER_0, 34 LDT_BUS_NUMBER_REGISTER_0,
@@ -37,8 +36,9 @@ fill_mp_bus_to_cpumask(void)
37 LDT_BUS_NUMBER_REGISTER_2 36 LDT_BUS_NUMBER_REGISTER_2
38 }; 37 };
39 38
40 while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, 39 cache_k8_northbridges();
41 PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) { 40 for (k = 0; k < num_k8_northbridges; k++) {
41 struct pci_dev *nb_dev = k8_northbridges[k];
42 pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid); 42 pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid);
43 43
44 for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { 44 for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) {
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index ac3c33a2e37d..229d015757f9 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -15,11 +15,9 @@
15#include <linux/agp_backend.h> 15#include <linux/agp_backend.h>
16#include <linux/mmzone.h> 16#include <linux/mmzone.h>
17#include <asm/page.h> /* PAGE_SIZE */ 17#include <asm/page.h> /* PAGE_SIZE */
18#include <asm/k8.h>
18#include "agp.h" 19#include "agp.h"
19 20
20/* Will need to be increased if AMD64 ever goes >8-way. */
21#define MAX_HAMMER_GARTS 8
22
23/* PTE bits. */ 21/* PTE bits. */
24#define GPTE_VALID 1 22#define GPTE_VALID 1
25#define GPTE_COHERENT 2 23#define GPTE_COHERENT 2
@@ -53,28 +51,12 @@
53#define ULI_X86_64_HTT_FEA_REG 0x50 51#define ULI_X86_64_HTT_FEA_REG 0x50
54#define ULI_X86_64_ENU_SCR_REG 0x54 52#define ULI_X86_64_ENU_SCR_REG 0x54
55 53
56static int nr_garts;
57static struct pci_dev * hammers[MAX_HAMMER_GARTS];
58
59static struct resource *aperture_resource; 54static struct resource *aperture_resource;
60static int __initdata agp_try_unsupported = 1; 55static int __initdata agp_try_unsupported = 1;
61 56
62#define for_each_nb() for(gart_iterator=0;gart_iterator<nr_garts;gart_iterator++)
63
64static void flush_amd64_tlb(struct pci_dev *dev)
65{
66 u32 tmp;
67
68 pci_read_config_dword (dev, AMD64_GARTCACHECTL, &tmp);
69 tmp |= INVGART;
70 pci_write_config_dword (dev, AMD64_GARTCACHECTL, tmp);
71}
72
73static void amd64_tlbflush(struct agp_memory *temp) 57static void amd64_tlbflush(struct agp_memory *temp)
74{ 58{
75 int gart_iterator; 59 k8_flush_garts();
76 for_each_nb()
77 flush_amd64_tlb(hammers[gart_iterator]);
78} 60}
79 61
80static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type) 62static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
@@ -153,7 +135,7 @@ static int amd64_fetch_size(void)
153 u32 temp; 135 u32 temp;
154 struct aper_size_info_32 *values; 136 struct aper_size_info_32 *values;
155 137
156 dev = hammers[0]; 138 dev = k8_northbridges[0];
157 if (dev==NULL) 139 if (dev==NULL)
158 return 0; 140 return 0;
159 141
@@ -201,9 +183,6 @@ static u64 amd64_configure (struct pci_dev *hammer, u64 gatt_table)
201 tmp &= ~(DISGARTCPU | DISGARTIO); 183 tmp &= ~(DISGARTCPU | DISGARTIO);
202 pci_write_config_dword(hammer, AMD64_GARTAPERTURECTL, tmp); 184 pci_write_config_dword(hammer, AMD64_GARTAPERTURECTL, tmp);
203 185
204 /* keep CPU's coherent. */
205 flush_amd64_tlb (hammer);
206
207 return aper_base; 186 return aper_base;
208} 187}
209 188
@@ -222,13 +201,14 @@ static struct aper_size_info_32 amd_8151_sizes[7] =
222static int amd_8151_configure(void) 201static int amd_8151_configure(void)
223{ 202{
224 unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real); 203 unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real);
225 int gart_iterator; 204 int i;
226 205
227 /* Configure AGP regs in each x86-64 host bridge. */ 206 /* Configure AGP regs in each x86-64 host bridge. */
228 for_each_nb() { 207 for (i = 0; i < num_k8_northbridges; i++) {
229 agp_bridge->gart_bus_addr = 208 agp_bridge->gart_bus_addr =
230 amd64_configure(hammers[gart_iterator],gatt_bus); 209 amd64_configure(k8_northbridges[i], gatt_bus);
231 } 210 }
211 k8_flush_garts();
232 return 0; 212 return 0;
233} 213}
234 214
@@ -236,12 +216,13 @@ static int amd_8151_configure(void)
236static void amd64_cleanup(void) 216static void amd64_cleanup(void)
237{ 217{
238 u32 tmp; 218 u32 tmp;
239 int gart_iterator; 219 int i;
240 for_each_nb() { 220 for (i = 0; i < num_k8_northbridges; i++) {
221 struct pci_dev *dev = k8_northbridges[i];
241 /* disable gart translation */ 222 /* disable gart translation */
242 pci_read_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, &tmp); 223 pci_read_config_dword (dev, AMD64_GARTAPERTURECTL, &tmp);
243 tmp &= ~AMD64_GARTEN; 224 tmp &= ~AMD64_GARTEN;
244 pci_write_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, tmp); 225 pci_write_config_dword (dev, AMD64_GARTAPERTURECTL, tmp);
245 } 226 }
246} 227}
247 228
@@ -361,17 +342,15 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp,
361 342
362static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr) 343static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr)
363{ 344{
364 struct pci_dev *loop_dev = NULL; 345 int i;
365 int i = 0; 346
366 347 if (cache_k8_northbridges() < 0)
367 /* cache pci_devs of northbridges. */ 348 return -ENODEV;
368 while ((loop_dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, loop_dev)) 349
369 != NULL) { 350 i = 0;
370 if (i == MAX_HAMMER_GARTS) { 351 for (i = 0; i < num_k8_northbridges; i++) {
371 printk(KERN_ERR PFX "Too many northbridges for AGP\n"); 352 struct pci_dev *dev = k8_northbridges[i];
372 return -1; 353 if (fix_northbridge(dev, pdev, cap_ptr) < 0) {
373 }
374 if (fix_northbridge(loop_dev, pdev, cap_ptr) < 0) {
375 printk(KERN_ERR PFX "No usable aperture found.\n"); 354 printk(KERN_ERR PFX "No usable aperture found.\n");
376#ifdef __x86_64__ 355#ifdef __x86_64__
377 /* should port this to i386 */ 356 /* should port this to i386 */
@@ -379,10 +358,8 @@ static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr)
379#endif 358#endif
380 return -1; 359 return -1;
381 } 360 }
382 hammers[i++] = loop_dev;
383 } 361 }
384 nr_garts = i; 362 return 0;
385 return i == 0 ? -1 : 0;
386} 363}
387 364
388/* Handle AMD 8151 quirks */ 365/* Handle AMD 8151 quirks */
@@ -450,7 +427,7 @@ static int __devinit uli_agp_init(struct pci_dev *pdev)
450 } 427 }
451 428
452 /* shadow x86-64 registers into ULi registers */ 429 /* shadow x86-64 registers into ULi registers */
453 pci_read_config_dword (hammers[0], AMD64_GARTAPERTUREBASE, &httfea); 430 pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &httfea);
454 431
455 /* if x86-64 aperture base is beyond 4G, exit here */ 432 /* if x86-64 aperture base is beyond 4G, exit here */
456 if ((httfea & 0x7fff) >> (32 - 25)) 433 if ((httfea & 0x7fff) >> (32 - 25))
@@ -513,7 +490,7 @@ static int __devinit nforce3_agp_init(struct pci_dev *pdev)
513 pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp); 490 pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp);
514 491
515 /* shadow x86-64 registers into NVIDIA registers */ 492 /* shadow x86-64 registers into NVIDIA registers */
516 pci_read_config_dword (hammers[0], AMD64_GARTAPERTUREBASE, &apbase); 493 pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &apbase);
517 494
518 /* if x86-64 aperture base is beyond 4G, exit here */ 495 /* if x86-64 aperture base is beyond 4G, exit here */
519 if ( (apbase & 0x7fff) >> (32 - 25) ) { 496 if ( (apbase & 0x7fff) >> (32 - 25) ) {
@@ -754,10 +731,6 @@ static struct pci_driver agp_amd64_pci_driver = {
754int __init agp_amd64_init(void) 731int __init agp_amd64_init(void)
755{ 732{
756 int err = 0; 733 int err = 0;
757 static struct pci_device_id amd64nb[] = {
758 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
759 { },
760 };
761 734
762 if (agp_off) 735 if (agp_off)
763 return -EINVAL; 736 return -EINVAL;
@@ -774,7 +747,7 @@ int __init agp_amd64_init(void)
774 } 747 }
775 748
776 /* First check that we have at least one AMD64 NB */ 749 /* First check that we have at least one AMD64 NB */
777 if (!pci_dev_present(amd64nb)) 750 if (!pci_dev_present(k8_nb_ids))
778 return -ENODEV; 751 return -ENODEV;
779 752
780 /* Look for any AGP bridge */ 753 /* Look for any AGP bridge */
diff --git a/include/asm-i386/k8.h b/include/asm-i386/k8.h
new file mode 100644
index 000000000000..dfd88a6e6040
--- /dev/null
+++ b/include/asm-i386/k8.h
@@ -0,0 +1 @@
#include <asm-x86_64/k8.h>
diff --git a/include/asm-x86_64/k8.h b/include/asm-x86_64/k8.h
new file mode 100644
index 000000000000..699dd6961eda
--- /dev/null
+++ b/include/asm-x86_64/k8.h
@@ -0,0 +1,14 @@
1#ifndef _ASM_K8_H
2#define _ASM_K8_H 1
3
4#include <linux/pci.h>
5
6extern struct pci_device_id k8_nb_ids[];
7
8extern int early_is_k8_nb(u32 value);
9extern struct pci_dev **k8_northbridges;
10extern int num_k8_northbridges;
11extern int cache_k8_northbridges(void);
12extern void k8_flush_garts(void);
13
14#endif