diff options
author | Andi Kleen <ak@suse.de> | 2006-06-26 07:56:40 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-26 13:48:15 -0400 |
commit | a32073bffc656ca4bde6002b6cf7c1a8e0e22712 (patch) | |
tree | 5ddcd3107eca8807685a19490c2c849d728a51a6 | |
parent | 7c2d9cd218916276e52a5dae827b84a159fe5c96 (diff) |
[PATCH] x86_64: Clean and enhance up K8 northbridge access code
- Factor out the duplicated access/cache code into a single file
* Shared between i386/x86-64.
- Share flush code between AGP and IOMMU
* Fix a bug: AGP didn't wait for end of flush before
- Drop 8 northbridges limit and allocate dynamically
- Add lock to serialize AGP and IOMMU GART flushes
- Add PCI ID for next AMD northbridge
- Random related cleanups
The old K8 NUMA discovery code is unchanged. New systems
should all use SRAT for this.
Cc: "Navin Boppuri" <navin.boppuri@newisys.com>
Cc: Dave Jones <davej@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/i386/Kconfig | 4 | ||||
-rw-r--r-- | arch/i386/kernel/Makefile | 4 | ||||
-rw-r--r-- | arch/x86_64/Kconfig | 4 | ||||
-rw-r--r-- | arch/x86_64/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86_64/kernel/aperture.c | 24 | ||||
-rw-r--r-- | arch/x86_64/kernel/k8.c | 118 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-gart.c | 93 | ||||
-rw-r--r-- | arch/x86_64/pci/k8-bus.c | 10 | ||||
-rw-r--r-- | drivers/char/agp/amd64-agp.c | 77 | ||||
-rw-r--r-- | include/asm-i386/k8.h | 1 | ||||
-rw-r--r-- | include/asm-x86_64/k8.h | 14 |
11 files changed, 209 insertions, 141 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 1596101cfaf8..2206bf6637de 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig | |||
@@ -1054,6 +1054,10 @@ config SCx200 | |||
1054 | This support is also available as a module. If compiled as a | 1054 | This support is also available as a module. If compiled as a |
1055 | module, it will be called scx200. | 1055 | module, it will be called scx200. |
1056 | 1056 | ||
1057 | config K8_NB | ||
1058 | def_bool y | ||
1059 | depends on AGP_AMD64 | ||
1060 | |||
1057 | source "drivers/pcmcia/Kconfig" | 1061 | source "drivers/pcmcia/Kconfig" |
1058 | 1062 | ||
1059 | source "drivers/pci/hotplug/Kconfig" | 1063 | source "drivers/pci/hotplug/Kconfig" |
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 96fb8a020af2..28b14d6c7b1a 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile | |||
@@ -37,6 +37,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o | |||
37 | obj-$(CONFIG_DOUBLEFAULT) += doublefault.o | 37 | obj-$(CONFIG_DOUBLEFAULT) += doublefault.o |
38 | obj-$(CONFIG_VM86) += vm86.o | 38 | obj-$(CONFIG_VM86) += vm86.o |
39 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | 39 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o |
40 | obj-$(CONFIG_K8_NB) += k8.o | ||
40 | 41 | ||
41 | EXTRA_AFLAGS := -traditional | 42 | EXTRA_AFLAGS := -traditional |
42 | 43 | ||
@@ -76,3 +77,6 @@ SYSCFLAGS_vsyscall-syms.o = -r | |||
76 | $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ | 77 | $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ |
77 | $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE | 78 | $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE |
78 | $(call if_changed,syscall) | 79 | $(call if_changed,syscall) |
80 | |||
81 | k8-y += ../../x86_64/kernel/k8.o | ||
82 | |||
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index af44130f0d65..fc75275d8c72 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
@@ -501,6 +501,10 @@ config REORDER | |||
501 | optimal TLB usage. If you have pretty much any version of binutils, | 501 | optimal TLB usage. If you have pretty much any version of binutils, |
502 | this can increase your kernel build time by roughly one minute. | 502 | this can increase your kernel build time by roughly one minute. |
503 | 503 | ||
504 | config K8_NB | ||
505 | def_bool y | ||
506 | depends on AGP_AMD64 || GART_IOMMU || (PCI && NUMA) | ||
507 | |||
504 | endmenu | 508 | endmenu |
505 | 509 | ||
506 | # | 510 | # |
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 381bc6ad743e..f927d11065fe 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
@@ -33,6 +33,7 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | |||
33 | obj-$(CONFIG_KPROBES) += kprobes.o | 33 | obj-$(CONFIG_KPROBES) += kprobes.o |
34 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o | 34 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o |
35 | obj-$(CONFIG_X86_VSMP) += vsmp.o | 35 | obj-$(CONFIG_X86_VSMP) += vsmp.o |
36 | obj-$(CONFIG_K8_NB) += k8.o | ||
36 | 37 | ||
37 | obj-$(CONFIG_MODULES) += module.o | 38 | obj-$(CONFIG_MODULES) += module.o |
38 | 39 | ||
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index 70b9d21ed675..a7ad03ee98cf 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <asm/proto.h> | 24 | #include <asm/proto.h> |
25 | #include <asm/pci-direct.h> | 25 | #include <asm/pci-direct.h> |
26 | #include <asm/dma.h> | 26 | #include <asm/dma.h> |
27 | #include <asm/k8.h> | ||
27 | 28 | ||
28 | int iommu_aperture; | 29 | int iommu_aperture; |
29 | int iommu_aperture_disabled __initdata = 0; | 30 | int iommu_aperture_disabled __initdata = 0; |
@@ -37,8 +38,6 @@ int fix_aperture __initdata = 1; | |||
37 | /* This code runs before the PCI subsystem is initialized, so just | 38 | /* This code runs before the PCI subsystem is initialized, so just |
38 | access the northbridge directly. */ | 39 | access the northbridge directly. */ |
39 | 40 | ||
40 | #define NB_ID_3 (PCI_VENDOR_ID_AMD | (0x1103<<16)) | ||
41 | |||
42 | static u32 __init allocate_aperture(void) | 41 | static u32 __init allocate_aperture(void) |
43 | { | 42 | { |
44 | pg_data_t *nd0 = NODE_DATA(0); | 43 | pg_data_t *nd0 = NODE_DATA(0); |
@@ -68,20 +67,20 @@ static u32 __init allocate_aperture(void) | |||
68 | return (u32)__pa(p); | 67 | return (u32)__pa(p); |
69 | } | 68 | } |
70 | 69 | ||
71 | static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size) | 70 | static int __init aperture_valid(u64 aper_base, u32 aper_size) |
72 | { | 71 | { |
73 | if (!aper_base) | 72 | if (!aper_base) |
74 | return 0; | 73 | return 0; |
75 | if (aper_size < 64*1024*1024) { | 74 | if (aper_size < 64*1024*1024) { |
76 | printk("Aperture from %s too small (%d MB)\n", name, aper_size>>20); | 75 | printk("Aperture too small (%d MB)\n", aper_size>>20); |
77 | return 0; | 76 | return 0; |
78 | } | 77 | } |
79 | if (aper_base + aper_size >= 0xffffffff) { | 78 | if (aper_base + aper_size >= 0xffffffff) { |
80 | printk("Aperture from %s beyond 4GB. Ignoring.\n",name); | 79 | printk("Aperture beyond 4GB. Ignoring.\n"); |
81 | return 0; | 80 | return 0; |
82 | } | 81 | } |
83 | if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { | 82 | if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { |
84 | printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name); | 83 | printk("Aperture pointing to e820 RAM. Ignoring.\n"); |
85 | return 0; | 84 | return 0; |
86 | } | 85 | } |
87 | return 1; | 86 | return 1; |
@@ -140,7 +139,7 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) | |||
140 | printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", | 139 | printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", |
141 | aper, 32 << *order, apsizereg); | 140 | aper, 32 << *order, apsizereg); |
142 | 141 | ||
143 | if (!aperture_valid("AGP bridge", aper, (32*1024*1024) << *order)) | 142 | if (!aperture_valid(aper, (32*1024*1024) << *order)) |
144 | return 0; | 143 | return 0; |
145 | return (u32)aper; | 144 | return (u32)aper; |
146 | } | 145 | } |
@@ -208,9 +207,8 @@ void __init iommu_hole_init(void) | |||
208 | 207 | ||
209 | fix = 0; | 208 | fix = 0; |
210 | for (num = 24; num < 32; num++) { | 209 | for (num = 24; num < 32; num++) { |
211 | char name[30]; | 210 | if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) |
212 | if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) | 211 | continue; |
213 | continue; | ||
214 | 212 | ||
215 | iommu_aperture = 1; | 213 | iommu_aperture = 1; |
216 | 214 | ||
@@ -222,9 +220,7 @@ void __init iommu_hole_init(void) | |||
222 | printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, | 220 | printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, |
223 | aper_base, aper_size>>20); | 221 | aper_base, aper_size>>20); |
224 | 222 | ||
225 | sprintf(name, "northbridge cpu %d", num-24); | 223 | if (!aperture_valid(aper_base, aper_size)) { |
226 | |||
227 | if (!aperture_valid(name, aper_base, aper_size)) { | ||
228 | fix = 1; | 224 | fix = 1; |
229 | break; | 225 | break; |
230 | } | 226 | } |
@@ -273,7 +269,7 @@ void __init iommu_hole_init(void) | |||
273 | 269 | ||
274 | /* Fix up the north bridges */ | 270 | /* Fix up the north bridges */ |
275 | for (num = 24; num < 32; num++) { | 271 | for (num = 24; num < 32; num++) { |
276 | if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) | 272 | if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) |
277 | continue; | 273 | continue; |
278 | 274 | ||
279 | /* Don't enable translation yet. That is done later. | 275 | /* Don't enable translation yet. That is done later. |
diff --git a/arch/x86_64/kernel/k8.c b/arch/x86_64/kernel/k8.c new file mode 100644 index 000000000000..6416682d33d0 --- /dev/null +++ b/arch/x86_64/kernel/k8.c | |||
@@ -0,0 +1,118 @@ | |||
1 | /* | ||
2 | * Shared support code for AMD K8 northbridges and derivates. | ||
3 | * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. | ||
4 | */ | ||
5 | #include <linux/gfp.h> | ||
6 | #include <linux/types.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/errno.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/spinlock.h> | ||
11 | #include <asm/k8.h> | ||
12 | |||
13 | int num_k8_northbridges; | ||
14 | EXPORT_SYMBOL(num_k8_northbridges); | ||
15 | |||
16 | static u32 *flush_words; | ||
17 | |||
18 | struct pci_device_id k8_nb_ids[] = { | ||
19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, | ||
20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, | ||
21 | {} | ||
22 | }; | ||
23 | EXPORT_SYMBOL(k8_nb_ids); | ||
24 | |||
25 | struct pci_dev **k8_northbridges; | ||
26 | EXPORT_SYMBOL(k8_northbridges); | ||
27 | |||
28 | static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) | ||
29 | { | ||
30 | do { | ||
31 | dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); | ||
32 | if (!dev) | ||
33 | break; | ||
34 | } while (!pci_match_id(&k8_nb_ids[0], dev)); | ||
35 | return dev; | ||
36 | } | ||
37 | |||
38 | int cache_k8_northbridges(void) | ||
39 | { | ||
40 | int i; | ||
41 | struct pci_dev *dev; | ||
42 | if (num_k8_northbridges) | ||
43 | return 0; | ||
44 | |||
45 | num_k8_northbridges = 0; | ||
46 | dev = NULL; | ||
47 | while ((dev = next_k8_northbridge(dev)) != NULL) | ||
48 | num_k8_northbridges++; | ||
49 | |||
50 | k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *), | ||
51 | GFP_KERNEL); | ||
52 | if (!k8_northbridges) | ||
53 | return -ENOMEM; | ||
54 | |||
55 | flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL); | ||
56 | if (!flush_words) { | ||
57 | kfree(k8_northbridges); | ||
58 | return -ENOMEM; | ||
59 | } | ||
60 | |||
61 | dev = NULL; | ||
62 | i = 0; | ||
63 | while ((dev = next_k8_northbridge(dev)) != NULL) { | ||
64 | k8_northbridges[i++] = dev; | ||
65 | pci_read_config_dword(dev, 0x9c, &flush_words[i]); | ||
66 | } | ||
67 | k8_northbridges[i] = NULL; | ||
68 | return 0; | ||
69 | } | ||
70 | EXPORT_SYMBOL_GPL(cache_k8_northbridges); | ||
71 | |||
72 | /* Ignores subdevice/subvendor but as far as I can figure out | ||
73 | they're useless anyways */ | ||
74 | int __init early_is_k8_nb(u32 device) | ||
75 | { | ||
76 | struct pci_device_id *id; | ||
77 | u32 vendor = device & 0xffff; | ||
78 | device >>= 16; | ||
79 | for (id = k8_nb_ids; id->vendor; id++) | ||
80 | if (vendor == id->vendor && device == id->device) | ||
81 | return 1; | ||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | void k8_flush_garts(void) | ||
86 | { | ||
87 | int flushed, i; | ||
88 | unsigned long flags; | ||
89 | static DEFINE_SPINLOCK(gart_lock); | ||
90 | |||
91 | /* Avoid races between AGP and IOMMU. In theory it's not needed | ||
92 | but I'm not sure if the hardware won't lose flush requests | ||
93 | when another is pending. This whole thing is so expensive anyways | ||
94 | that it doesn't matter to serialize more. -AK */ | ||
95 | spin_lock_irqsave(&gart_lock, flags); | ||
96 | flushed = 0; | ||
97 | for (i = 0; i < num_k8_northbridges; i++) { | ||
98 | pci_write_config_dword(k8_northbridges[i], 0x9c, | ||
99 | flush_words[i]|1); | ||
100 | flushed++; | ||
101 | } | ||
102 | for (i = 0; i < num_k8_northbridges; i++) { | ||
103 | u32 w; | ||
104 | /* Make sure the hardware actually executed the flush*/ | ||
105 | for (;;) { | ||
106 | pci_read_config_dword(k8_northbridges[i], | ||
107 | 0x9c, &w); | ||
108 | if (!(w & 1)) | ||
109 | break; | ||
110 | cpu_relax(); | ||
111 | } | ||
112 | } | ||
113 | spin_unlock_irqrestore(&gart_lock, flags); | ||
114 | if (!flushed) | ||
115 | printk("nothing to flush?\n"); | ||
116 | } | ||
117 | EXPORT_SYMBOL_GPL(k8_flush_garts); | ||
118 | |||
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index ea8f4041794e..ded3af3bceec 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <asm/kdebug.h> | 32 | #include <asm/kdebug.h> |
33 | #include <asm/swiotlb.h> | 33 | #include <asm/swiotlb.h> |
34 | #include <asm/dma.h> | 34 | #include <asm/dma.h> |
35 | #include <asm/k8.h> | ||
35 | 36 | ||
36 | unsigned long iommu_bus_base; /* GART remapping area (physical) */ | 37 | unsigned long iommu_bus_base; /* GART remapping area (physical) */ |
37 | static unsigned long iommu_size; /* size of remapping area bytes */ | 38 | static unsigned long iommu_size; /* size of remapping area bytes */ |
@@ -46,8 +47,6 @@ u32 *iommu_gatt_base; /* Remapping table */ | |||
46 | also seen with Qlogic at least). */ | 47 | also seen with Qlogic at least). */ |
47 | int iommu_fullflush = 1; | 48 | int iommu_fullflush = 1; |
48 | 49 | ||
49 | #define MAX_NB 8 | ||
50 | |||
51 | /* Allocation bitmap for the remapping area */ | 50 | /* Allocation bitmap for the remapping area */ |
52 | static DEFINE_SPINLOCK(iommu_bitmap_lock); | 51 | static DEFINE_SPINLOCK(iommu_bitmap_lock); |
53 | static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ | 52 | static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ |
@@ -63,13 +62,6 @@ static u32 gart_unmapped_entry; | |||
63 | #define to_pages(addr,size) \ | 62 | #define to_pages(addr,size) \ |
64 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) | 63 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) |
65 | 64 | ||
66 | #define for_all_nb(dev) \ | ||
67 | dev = NULL; \ | ||
68 | while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL) | ||
69 | |||
70 | static struct pci_dev *northbridges[MAX_NB]; | ||
71 | static u32 northbridge_flush_word[MAX_NB]; | ||
72 | |||
73 | #define EMERGENCY_PAGES 32 /* = 128KB */ | 65 | #define EMERGENCY_PAGES 32 /* = 128KB */ |
74 | 66 | ||
75 | #ifdef CONFIG_AGP | 67 | #ifdef CONFIG_AGP |
@@ -120,44 +112,17 @@ static void free_iommu(unsigned long offset, int size) | |||
120 | /* | 112 | /* |
121 | * Use global flush state to avoid races with multiple flushers. | 113 | * Use global flush state to avoid races with multiple flushers. |
122 | */ | 114 | */ |
123 | static void flush_gart(struct device *dev) | 115 | static void flush_gart(void) |
124 | { | 116 | { |
125 | unsigned long flags; | 117 | unsigned long flags; |
126 | int flushed = 0; | ||
127 | int i, max; | ||
128 | |||
129 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 118 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
130 | if (need_flush) { | 119 | if (need_flush) { |
131 | max = 0; | 120 | k8_flush_garts(); |
132 | for (i = 0; i < MAX_NB; i++) { | ||
133 | if (!northbridges[i]) | ||
134 | continue; | ||
135 | pci_write_config_dword(northbridges[i], 0x9c, | ||
136 | northbridge_flush_word[i] | 1); | ||
137 | flushed++; | ||
138 | max = i; | ||
139 | } | ||
140 | for (i = 0; i <= max; i++) { | ||
141 | u32 w; | ||
142 | if (!northbridges[i]) | ||
143 | continue; | ||
144 | /* Make sure the hardware actually executed the flush. */ | ||
145 | for (;;) { | ||
146 | pci_read_config_dword(northbridges[i], 0x9c, &w); | ||
147 | if (!(w & 1)) | ||
148 | break; | ||
149 | cpu_relax(); | ||
150 | } | ||
151 | } | ||
152 | if (!flushed) | ||
153 | printk("nothing to flush?\n"); | ||
154 | need_flush = 0; | 121 | need_flush = 0; |
155 | } | 122 | } |
156 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 123 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
157 | } | 124 | } |
158 | 125 | ||
159 | |||
160 | |||
161 | #ifdef CONFIG_IOMMU_LEAK | 126 | #ifdef CONFIG_IOMMU_LEAK |
162 | 127 | ||
163 | #define SET_LEAK(x) if (iommu_leak_tab) \ | 128 | #define SET_LEAK(x) if (iommu_leak_tab) \ |
@@ -266,7 +231,7 @@ static dma_addr_t gart_map_simple(struct device *dev, char *buf, | |||
266 | size_t size, int dir) | 231 | size_t size, int dir) |
267 | { | 232 | { |
268 | dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); | 233 | dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); |
269 | flush_gart(dev); | 234 | flush_gart(); |
270 | return map; | 235 | return map; |
271 | } | 236 | } |
272 | 237 | ||
@@ -351,7 +316,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
351 | s->dma_address = addr; | 316 | s->dma_address = addr; |
352 | s->dma_length = s->length; | 317 | s->dma_length = s->length; |
353 | } | 318 | } |
354 | flush_gart(dev); | 319 | flush_gart(); |
355 | return nents; | 320 | return nents; |
356 | } | 321 | } |
357 | 322 | ||
@@ -458,13 +423,13 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
458 | if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0) | 423 | if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0) |
459 | goto error; | 424 | goto error; |
460 | out++; | 425 | out++; |
461 | flush_gart(dev); | 426 | flush_gart(); |
462 | if (out < nents) | 427 | if (out < nents) |
463 | sg[out].dma_length = 0; | 428 | sg[out].dma_length = 0; |
464 | return out; | 429 | return out; |
465 | 430 | ||
466 | error: | 431 | error: |
467 | flush_gart(NULL); | 432 | flush_gart(); |
468 | gart_unmap_sg(dev, sg, nents, dir); | 433 | gart_unmap_sg(dev, sg, nents, dir); |
469 | /* When it was forced or merged try again in a dumb way */ | 434 | /* When it was forced or merged try again in a dumb way */ |
470 | if (force_iommu || iommu_merge) { | 435 | if (force_iommu || iommu_merge) { |
@@ -532,10 +497,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
532 | void *gatt; | 497 | void *gatt; |
533 | unsigned aper_base, new_aper_base; | 498 | unsigned aper_base, new_aper_base; |
534 | unsigned aper_size, gatt_size, new_aper_size; | 499 | unsigned aper_size, gatt_size, new_aper_size; |
535 | 500 | int i; | |
501 | |||
536 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); | 502 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); |
537 | aper_size = aper_base = info->aper_size = 0; | 503 | aper_size = aper_base = info->aper_size = 0; |
538 | for_all_nb(dev) { | 504 | dev = NULL; |
505 | for (i = 0; i < num_k8_northbridges; i++) { | ||
506 | dev = k8_northbridges[i]; | ||
539 | new_aper_base = read_aperture(dev, &new_aper_size); | 507 | new_aper_base = read_aperture(dev, &new_aper_size); |
540 | if (!new_aper_base) | 508 | if (!new_aper_base) |
541 | goto nommu; | 509 | goto nommu; |
@@ -558,11 +526,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
558 | panic("Cannot allocate GATT table"); | 526 | panic("Cannot allocate GATT table"); |
559 | memset(gatt, 0, gatt_size); | 527 | memset(gatt, 0, gatt_size); |
560 | agp_gatt_table = gatt; | 528 | agp_gatt_table = gatt; |
561 | 529 | ||
562 | for_all_nb(dev) { | 530 | for (i = 0; i < num_k8_northbridges; i++) { |
563 | u32 ctl; | 531 | u32 ctl; |
564 | u32 gatt_reg; | 532 | u32 gatt_reg; |
565 | 533 | ||
534 | dev = k8_northbridges[i]; | ||
566 | gatt_reg = __pa(gatt) >> 12; | 535 | gatt_reg = __pa(gatt) >> 12; |
567 | gatt_reg <<= 4; | 536 | gatt_reg <<= 4; |
568 | pci_write_config_dword(dev, 0x98, gatt_reg); | 537 | pci_write_config_dword(dev, 0x98, gatt_reg); |
@@ -573,7 +542,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
573 | 542 | ||
574 | pci_write_config_dword(dev, 0x90, ctl); | 543 | pci_write_config_dword(dev, 0x90, ctl); |
575 | } | 544 | } |
576 | flush_gart(NULL); | 545 | flush_gart(); |
577 | 546 | ||
578 | printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); | 547 | printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); |
579 | return 0; | 548 | return 0; |
@@ -607,10 +576,14 @@ static int __init pci_iommu_init(void) | |||
607 | struct agp_kern_info info; | 576 | struct agp_kern_info info; |
608 | unsigned long aper_size; | 577 | unsigned long aper_size; |
609 | unsigned long iommu_start; | 578 | unsigned long iommu_start; |
610 | struct pci_dev *dev; | ||
611 | unsigned long scratch; | 579 | unsigned long scratch; |
612 | long i; | 580 | long i; |
613 | 581 | ||
582 | if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) { | ||
583 | printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n"); | ||
584 | return -1; | ||
585 | } | ||
586 | |||
614 | #ifndef CONFIG_AGP_AMD64 | 587 | #ifndef CONFIG_AGP_AMD64 |
615 | no_agp = 1; | 588 | no_agp = 1; |
616 | #else | 589 | #else |
@@ -637,14 +610,6 @@ static int __init pci_iommu_init(void) | |||
637 | return -1; | 610 | return -1; |
638 | } | 611 | } |
639 | 612 | ||
640 | i = 0; | ||
641 | for_all_nb(dev) | ||
642 | i++; | ||
643 | if (i > MAX_NB) { | ||
644 | printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i); | ||
645 | return -1; | ||
646 | } | ||
647 | |||
648 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); | 613 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); |
649 | aper_size = info.aper_size * 1024 * 1024; | 614 | aper_size = info.aper_size * 1024 * 1024; |
650 | iommu_size = check_iommu_size(info.aper_base, aper_size); | 615 | iommu_size = check_iommu_size(info.aper_base, aper_size); |
@@ -707,20 +672,8 @@ static int __init pci_iommu_init(void) | |||
707 | for (i = EMERGENCY_PAGES; i < iommu_pages; i++) | 672 | for (i = EMERGENCY_PAGES; i < iommu_pages; i++) |
708 | iommu_gatt_base[i] = gart_unmapped_entry; | 673 | iommu_gatt_base[i] = gart_unmapped_entry; |
709 | 674 | ||
710 | for_all_nb(dev) { | 675 | flush_gart(); |
711 | u32 flag; | ||
712 | int cpu = PCI_SLOT(dev->devfn) - 24; | ||
713 | if (cpu >= MAX_NB) | ||
714 | continue; | ||
715 | northbridges[cpu] = dev; | ||
716 | pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */ | ||
717 | northbridge_flush_word[cpu] = flag; | ||
718 | } | ||
719 | |||
720 | flush_gart(NULL); | ||
721 | |||
722 | dma_ops = &gart_dma_ops; | 676 | dma_ops = &gart_dma_ops; |
723 | |||
724 | return 0; | 677 | return 0; |
725 | } | 678 | } |
726 | 679 | ||
diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c index 3acf60ded2a0..b50a7c7c47f8 100644 --- a/arch/x86_64/pci/k8-bus.c +++ b/arch/x86_64/pci/k8-bus.c | |||
@@ -2,6 +2,7 @@ | |||
2 | #include <linux/pci.h> | 2 | #include <linux/pci.h> |
3 | #include <asm/mpspec.h> | 3 | #include <asm/mpspec.h> |
4 | #include <linux/cpumask.h> | 4 | #include <linux/cpumask.h> |
5 | #include <asm/k8.h> | ||
5 | 6 | ||
6 | /* | 7 | /* |
7 | * This discovers the pcibus <-> node mapping on AMD K8. | 8 | * This discovers the pcibus <-> node mapping on AMD K8. |
@@ -18,7 +19,6 @@ | |||
18 | #define NR_LDT_BUS_NUMBER_REGISTERS 3 | 19 | #define NR_LDT_BUS_NUMBER_REGISTERS 3 |
19 | #define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF) | 20 | #define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF) |
20 | #define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) | 21 | #define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) |
21 | #define PCI_DEVICE_ID_K8HTCONFIG 0x1100 | ||
22 | 22 | ||
23 | /** | 23 | /** |
24 | * fill_mp_bus_to_cpumask() | 24 | * fill_mp_bus_to_cpumask() |
@@ -28,8 +28,7 @@ | |||
28 | __init static int | 28 | __init static int |
29 | fill_mp_bus_to_cpumask(void) | 29 | fill_mp_bus_to_cpumask(void) |
30 | { | 30 | { |
31 | struct pci_dev *nb_dev = NULL; | 31 | int i, j, k; |
32 | int i, j; | ||
33 | u32 ldtbus, nid; | 32 | u32 ldtbus, nid; |
34 | static int lbnr[3] = { | 33 | static int lbnr[3] = { |
35 | LDT_BUS_NUMBER_REGISTER_0, | 34 | LDT_BUS_NUMBER_REGISTER_0, |
@@ -37,8 +36,9 @@ fill_mp_bus_to_cpumask(void) | |||
37 | LDT_BUS_NUMBER_REGISTER_2 | 36 | LDT_BUS_NUMBER_REGISTER_2 |
38 | }; | 37 | }; |
39 | 38 | ||
40 | while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, | 39 | cache_k8_northbridges(); |
41 | PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) { | 40 | for (k = 0; k < num_k8_northbridges; k++) { |
41 | struct pci_dev *nb_dev = k8_northbridges[k]; | ||
42 | pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid); | 42 | pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid); |
43 | 43 | ||
44 | for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { | 44 | for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { |
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index ac3c33a2e37d..229d015757f9 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c | |||
@@ -15,11 +15,9 @@ | |||
15 | #include <linux/agp_backend.h> | 15 | #include <linux/agp_backend.h> |
16 | #include <linux/mmzone.h> | 16 | #include <linux/mmzone.h> |
17 | #include <asm/page.h> /* PAGE_SIZE */ | 17 | #include <asm/page.h> /* PAGE_SIZE */ |
18 | #include <asm/k8.h> | ||
18 | #include "agp.h" | 19 | #include "agp.h" |
19 | 20 | ||
20 | /* Will need to be increased if AMD64 ever goes >8-way. */ | ||
21 | #define MAX_HAMMER_GARTS 8 | ||
22 | |||
23 | /* PTE bits. */ | 21 | /* PTE bits. */ |
24 | #define GPTE_VALID 1 | 22 | #define GPTE_VALID 1 |
25 | #define GPTE_COHERENT 2 | 23 | #define GPTE_COHERENT 2 |
@@ -53,28 +51,12 @@ | |||
53 | #define ULI_X86_64_HTT_FEA_REG 0x50 | 51 | #define ULI_X86_64_HTT_FEA_REG 0x50 |
54 | #define ULI_X86_64_ENU_SCR_REG 0x54 | 52 | #define ULI_X86_64_ENU_SCR_REG 0x54 |
55 | 53 | ||
56 | static int nr_garts; | ||
57 | static struct pci_dev * hammers[MAX_HAMMER_GARTS]; | ||
58 | |||
59 | static struct resource *aperture_resource; | 54 | static struct resource *aperture_resource; |
60 | static int __initdata agp_try_unsupported = 1; | 55 | static int __initdata agp_try_unsupported = 1; |
61 | 56 | ||
62 | #define for_each_nb() for(gart_iterator=0;gart_iterator<nr_garts;gart_iterator++) | ||
63 | |||
64 | static void flush_amd64_tlb(struct pci_dev *dev) | ||
65 | { | ||
66 | u32 tmp; | ||
67 | |||
68 | pci_read_config_dword (dev, AMD64_GARTCACHECTL, &tmp); | ||
69 | tmp |= INVGART; | ||
70 | pci_write_config_dword (dev, AMD64_GARTCACHECTL, tmp); | ||
71 | } | ||
72 | |||
73 | static void amd64_tlbflush(struct agp_memory *temp) | 57 | static void amd64_tlbflush(struct agp_memory *temp) |
74 | { | 58 | { |
75 | int gart_iterator; | 59 | k8_flush_garts(); |
76 | for_each_nb() | ||
77 | flush_amd64_tlb(hammers[gart_iterator]); | ||
78 | } | 60 | } |
79 | 61 | ||
80 | static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type) | 62 | static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type) |
@@ -153,7 +135,7 @@ static int amd64_fetch_size(void) | |||
153 | u32 temp; | 135 | u32 temp; |
154 | struct aper_size_info_32 *values; | 136 | struct aper_size_info_32 *values; |
155 | 137 | ||
156 | dev = hammers[0]; | 138 | dev = k8_northbridges[0]; |
157 | if (dev==NULL) | 139 | if (dev==NULL) |
158 | return 0; | 140 | return 0; |
159 | 141 | ||
@@ -201,9 +183,6 @@ static u64 amd64_configure (struct pci_dev *hammer, u64 gatt_table) | |||
201 | tmp &= ~(DISGARTCPU | DISGARTIO); | 183 | tmp &= ~(DISGARTCPU | DISGARTIO); |
202 | pci_write_config_dword(hammer, AMD64_GARTAPERTURECTL, tmp); | 184 | pci_write_config_dword(hammer, AMD64_GARTAPERTURECTL, tmp); |
203 | 185 | ||
204 | /* keep CPU's coherent. */ | ||
205 | flush_amd64_tlb (hammer); | ||
206 | |||
207 | return aper_base; | 186 | return aper_base; |
208 | } | 187 | } |
209 | 188 | ||
@@ -222,13 +201,14 @@ static struct aper_size_info_32 amd_8151_sizes[7] = | |||
222 | static int amd_8151_configure(void) | 201 | static int amd_8151_configure(void) |
223 | { | 202 | { |
224 | unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real); | 203 | unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real); |
225 | int gart_iterator; | 204 | int i; |
226 | 205 | ||
227 | /* Configure AGP regs in each x86-64 host bridge. */ | 206 | /* Configure AGP regs in each x86-64 host bridge. */ |
228 | for_each_nb() { | 207 | for (i = 0; i < num_k8_northbridges; i++) { |
229 | agp_bridge->gart_bus_addr = | 208 | agp_bridge->gart_bus_addr = |
230 | amd64_configure(hammers[gart_iterator],gatt_bus); | 209 | amd64_configure(k8_northbridges[i], gatt_bus); |
231 | } | 210 | } |
211 | k8_flush_garts(); | ||
232 | return 0; | 212 | return 0; |
233 | } | 213 | } |
234 | 214 | ||
@@ -236,12 +216,13 @@ static int amd_8151_configure(void) | |||
236 | static void amd64_cleanup(void) | 216 | static void amd64_cleanup(void) |
237 | { | 217 | { |
238 | u32 tmp; | 218 | u32 tmp; |
239 | int gart_iterator; | 219 | int i; |
240 | for_each_nb() { | 220 | for (i = 0; i < num_k8_northbridges; i++) { |
221 | struct pci_dev *dev = k8_northbridges[i]; | ||
241 | /* disable gart translation */ | 222 | /* disable gart translation */ |
242 | pci_read_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, &tmp); | 223 | pci_read_config_dword (dev, AMD64_GARTAPERTURECTL, &tmp); |
243 | tmp &= ~AMD64_GARTEN; | 224 | tmp &= ~AMD64_GARTEN; |
244 | pci_write_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, tmp); | 225 | pci_write_config_dword (dev, AMD64_GARTAPERTURECTL, tmp); |
245 | } | 226 | } |
246 | } | 227 | } |
247 | 228 | ||
@@ -361,17 +342,15 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp, | |||
361 | 342 | ||
362 | static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr) | 343 | static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr) |
363 | { | 344 | { |
364 | struct pci_dev *loop_dev = NULL; | 345 | int i; |
365 | int i = 0; | 346 | |
366 | 347 | if (cache_k8_northbridges() < 0) | |
367 | /* cache pci_devs of northbridges. */ | 348 | return -ENODEV; |
368 | while ((loop_dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, loop_dev)) | 349 | |
369 | != NULL) { | 350 | i = 0; |
370 | if (i == MAX_HAMMER_GARTS) { | 351 | for (i = 0; i < num_k8_northbridges; i++) { |
371 | printk(KERN_ERR PFX "Too many northbridges for AGP\n"); | 352 | struct pci_dev *dev = k8_northbridges[i]; |
372 | return -1; | 353 | if (fix_northbridge(dev, pdev, cap_ptr) < 0) { |
373 | } | ||
374 | if (fix_northbridge(loop_dev, pdev, cap_ptr) < 0) { | ||
375 | printk(KERN_ERR PFX "No usable aperture found.\n"); | 354 | printk(KERN_ERR PFX "No usable aperture found.\n"); |
376 | #ifdef __x86_64__ | 355 | #ifdef __x86_64__ |
377 | /* should port this to i386 */ | 356 | /* should port this to i386 */ |
@@ -379,10 +358,8 @@ static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr) | |||
379 | #endif | 358 | #endif |
380 | return -1; | 359 | return -1; |
381 | } | 360 | } |
382 | hammers[i++] = loop_dev; | ||
383 | } | 361 | } |
384 | nr_garts = i; | 362 | return 0; |
385 | return i == 0 ? -1 : 0; | ||
386 | } | 363 | } |
387 | 364 | ||
388 | /* Handle AMD 8151 quirks */ | 365 | /* Handle AMD 8151 quirks */ |
@@ -450,7 +427,7 @@ static int __devinit uli_agp_init(struct pci_dev *pdev) | |||
450 | } | 427 | } |
451 | 428 | ||
452 | /* shadow x86-64 registers into ULi registers */ | 429 | /* shadow x86-64 registers into ULi registers */ |
453 | pci_read_config_dword (hammers[0], AMD64_GARTAPERTUREBASE, &httfea); | 430 | pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &httfea); |
454 | 431 | ||
455 | /* if x86-64 aperture base is beyond 4G, exit here */ | 432 | /* if x86-64 aperture base is beyond 4G, exit here */ |
456 | if ((httfea & 0x7fff) >> (32 - 25)) | 433 | if ((httfea & 0x7fff) >> (32 - 25)) |
@@ -513,7 +490,7 @@ static int __devinit nforce3_agp_init(struct pci_dev *pdev) | |||
513 | pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp); | 490 | pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp); |
514 | 491 | ||
515 | /* shadow x86-64 registers into NVIDIA registers */ | 492 | /* shadow x86-64 registers into NVIDIA registers */ |
516 | pci_read_config_dword (hammers[0], AMD64_GARTAPERTUREBASE, &apbase); | 493 | pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &apbase); |
517 | 494 | ||
518 | /* if x86-64 aperture base is beyond 4G, exit here */ | 495 | /* if x86-64 aperture base is beyond 4G, exit here */ |
519 | if ( (apbase & 0x7fff) >> (32 - 25) ) { | 496 | if ( (apbase & 0x7fff) >> (32 - 25) ) { |
@@ -754,10 +731,6 @@ static struct pci_driver agp_amd64_pci_driver = { | |||
754 | int __init agp_amd64_init(void) | 731 | int __init agp_amd64_init(void) |
755 | { | 732 | { |
756 | int err = 0; | 733 | int err = 0; |
757 | static struct pci_device_id amd64nb[] = { | ||
758 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, | ||
759 | { }, | ||
760 | }; | ||
761 | 734 | ||
762 | if (agp_off) | 735 | if (agp_off) |
763 | return -EINVAL; | 736 | return -EINVAL; |
@@ -774,7 +747,7 @@ int __init agp_amd64_init(void) | |||
774 | } | 747 | } |
775 | 748 | ||
776 | /* First check that we have at least one AMD64 NB */ | 749 | /* First check that we have at least one AMD64 NB */ |
777 | if (!pci_dev_present(amd64nb)) | 750 | if (!pci_dev_present(k8_nb_ids)) |
778 | return -ENODEV; | 751 | return -ENODEV; |
779 | 752 | ||
780 | /* Look for any AGP bridge */ | 753 | /* Look for any AGP bridge */ |
diff --git a/include/asm-i386/k8.h b/include/asm-i386/k8.h new file mode 100644 index 000000000000..dfd88a6e6040 --- /dev/null +++ b/include/asm-i386/k8.h | |||
@@ -0,0 +1 @@ | |||
#include <asm-x86_64/k8.h> | |||
diff --git a/include/asm-x86_64/k8.h b/include/asm-x86_64/k8.h new file mode 100644 index 000000000000..699dd6961eda --- /dev/null +++ b/include/asm-x86_64/k8.h | |||
@@ -0,0 +1,14 @@ | |||
1 | #ifndef _ASM_K8_H | ||
2 | #define _ASM_K8_H 1 | ||
3 | |||
4 | #include <linux/pci.h> | ||
5 | |||
6 | extern struct pci_device_id k8_nb_ids[]; | ||
7 | |||
8 | extern int early_is_k8_nb(u32 value); | ||
9 | extern struct pci_dev **k8_northbridges; | ||
10 | extern int num_k8_northbridges; | ||
11 | extern int cache_k8_northbridges(void); | ||
12 | extern void k8_flush_garts(void); | ||
13 | |||
14 | #endif | ||