aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2006-06-26 07:56:40 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-26 13:48:15 -0400
commita32073bffc656ca4bde6002b6cf7c1a8e0e22712 (patch)
tree5ddcd3107eca8807685a19490c2c849d728a51a6 /arch/x86_64/kernel
parent7c2d9cd218916276e52a5dae827b84a159fe5c96 (diff)
[PATCH] x86_64: Clean and enhance up K8 northbridge access code
- Factor out the duplicated access/cache code into a single file * Shared between i386/x86-64. - Share flush code between AGP and IOMMU * Fix a bug: AGP didn't wait for end of flush before - Drop 8 northbridges limit and allocate dynamically - Add lock to serialize AGP and IOMMU GART flushes - Add PCI ID for next AMD northbridge - Random related cleanups The old K8 NUMA discovery code is unchanged. New systems should all use SRAT for this. Cc: "Navin Boppuri" <navin.boppuri@newisys.com> Cc: Dave Jones <davej@redhat.com> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64/kernel')
-rw-r--r--arch/x86_64/kernel/Makefile1
-rw-r--r--arch/x86_64/kernel/aperture.c24
-rw-r--r--arch/x86_64/kernel/k8.c118
-rw-r--r--arch/x86_64/kernel/pci-gart.c93
4 files changed, 152 insertions, 84 deletions
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 381bc6ad743e..f927d11065fe 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
33obj-$(CONFIG_KPROBES) += kprobes.o 33obj-$(CONFIG_KPROBES) += kprobes.o
34obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o 34obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
35obj-$(CONFIG_X86_VSMP) += vsmp.o 35obj-$(CONFIG_X86_VSMP) += vsmp.o
36obj-$(CONFIG_K8_NB) += k8.o
36 37
37obj-$(CONFIG_MODULES) += module.o 38obj-$(CONFIG_MODULES) += module.o
38 39
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index 70b9d21ed675..a7ad03ee98cf 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -24,6 +24,7 @@
24#include <asm/proto.h> 24#include <asm/proto.h>
25#include <asm/pci-direct.h> 25#include <asm/pci-direct.h>
26#include <asm/dma.h> 26#include <asm/dma.h>
27#include <asm/k8.h>
27 28
28int iommu_aperture; 29int iommu_aperture;
29int iommu_aperture_disabled __initdata = 0; 30int iommu_aperture_disabled __initdata = 0;
@@ -37,8 +38,6 @@ int fix_aperture __initdata = 1;
37/* This code runs before the PCI subsystem is initialized, so just 38/* This code runs before the PCI subsystem is initialized, so just
38 access the northbridge directly. */ 39 access the northbridge directly. */
39 40
40#define NB_ID_3 (PCI_VENDOR_ID_AMD | (0x1103<<16))
41
42static u32 __init allocate_aperture(void) 41static u32 __init allocate_aperture(void)
43{ 42{
44 pg_data_t *nd0 = NODE_DATA(0); 43 pg_data_t *nd0 = NODE_DATA(0);
@@ -68,20 +67,20 @@ static u32 __init allocate_aperture(void)
68 return (u32)__pa(p); 67 return (u32)__pa(p);
69} 68}
70 69
71static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size) 70static int __init aperture_valid(u64 aper_base, u32 aper_size)
72{ 71{
73 if (!aper_base) 72 if (!aper_base)
74 return 0; 73 return 0;
75 if (aper_size < 64*1024*1024) { 74 if (aper_size < 64*1024*1024) {
76 printk("Aperture from %s too small (%d MB)\n", name, aper_size>>20); 75 printk("Aperture too small (%d MB)\n", aper_size>>20);
77 return 0; 76 return 0;
78 } 77 }
79 if (aper_base + aper_size >= 0xffffffff) { 78 if (aper_base + aper_size >= 0xffffffff) {
80 printk("Aperture from %s beyond 4GB. Ignoring.\n",name); 79 printk("Aperture beyond 4GB. Ignoring.\n");
81 return 0; 80 return 0;
82 } 81 }
83 if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { 82 if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
84 printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name); 83 printk("Aperture pointing to e820 RAM. Ignoring.\n");
85 return 0; 84 return 0;
86 } 85 }
87 return 1; 86 return 1;
@@ -140,7 +139,7 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
140 printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", 139 printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
141 aper, 32 << *order, apsizereg); 140 aper, 32 << *order, apsizereg);
142 141
143 if (!aperture_valid("AGP bridge", aper, (32*1024*1024) << *order)) 142 if (!aperture_valid(aper, (32*1024*1024) << *order))
144 return 0; 143 return 0;
145 return (u32)aper; 144 return (u32)aper;
146} 145}
@@ -208,9 +207,8 @@ void __init iommu_hole_init(void)
208 207
209 fix = 0; 208 fix = 0;
210 for (num = 24; num < 32; num++) { 209 for (num = 24; num < 32; num++) {
211 char name[30]; 210 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
212 if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) 211 continue;
213 continue;
214 212
215 iommu_aperture = 1; 213 iommu_aperture = 1;
216 214
@@ -222,9 +220,7 @@ void __init iommu_hole_init(void)
222 printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, 220 printk("CPU %d: aperture @ %Lx size %u MB\n", num-24,
223 aper_base, aper_size>>20); 221 aper_base, aper_size>>20);
224 222
225 sprintf(name, "northbridge cpu %d", num-24); 223 if (!aperture_valid(aper_base, aper_size)) {
226
227 if (!aperture_valid(name, aper_base, aper_size)) {
228 fix = 1; 224 fix = 1;
229 break; 225 break;
230 } 226 }
@@ -273,7 +269,7 @@ void __init iommu_hole_init(void)
273 269
274 /* Fix up the north bridges */ 270 /* Fix up the north bridges */
275 for (num = 24; num < 32; num++) { 271 for (num = 24; num < 32; num++) {
276 if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) 272 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
277 continue; 273 continue;
278 274
279 /* Don't enable translation yet. That is done later. 275 /* Don't enable translation yet. That is done later.
diff --git a/arch/x86_64/kernel/k8.c b/arch/x86_64/kernel/k8.c
new file mode 100644
index 000000000000..6416682d33d0
--- /dev/null
+++ b/arch/x86_64/kernel/k8.c
@@ -0,0 +1,118 @@
1/*
2 * Shared support code for AMD K8 northbridges and derivates.
3 * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
4 */
5#include <linux/gfp.h>
6#include <linux/types.h>
7#include <linux/init.h>
8#include <linux/errno.h>
9#include <linux/module.h>
10#include <linux/spinlock.h>
11#include <asm/k8.h>
12
13int num_k8_northbridges;
14EXPORT_SYMBOL(num_k8_northbridges);
15
16static u32 *flush_words;
17
18struct pci_device_id k8_nb_ids[] = {
19 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
21 {}
22};
23EXPORT_SYMBOL(k8_nb_ids);
24
25struct pci_dev **k8_northbridges;
26EXPORT_SYMBOL(k8_northbridges);
27
28static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
29{
30 do {
31 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
32 if (!dev)
33 break;
34 } while (!pci_match_id(&k8_nb_ids[0], dev));
35 return dev;
36}
37
38int cache_k8_northbridges(void)
39{
40 int i;
41 struct pci_dev *dev;
42 if (num_k8_northbridges)
43 return 0;
44
45 num_k8_northbridges = 0;
46 dev = NULL;
47 while ((dev = next_k8_northbridge(dev)) != NULL)
48 num_k8_northbridges++;
49
50 k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *),
51 GFP_KERNEL);
52 if (!k8_northbridges)
53 return -ENOMEM;
54
55 flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL);
56 if (!flush_words) {
57 kfree(k8_northbridges);
58 return -ENOMEM;
59 }
60
61 dev = NULL;
62 i = 0;
63 while ((dev = next_k8_northbridge(dev)) != NULL) {
64 k8_northbridges[i++] = dev;
65 pci_read_config_dword(dev, 0x9c, &flush_words[i]);
66 }
67 k8_northbridges[i] = NULL;
68 return 0;
69}
70EXPORT_SYMBOL_GPL(cache_k8_northbridges);
71
72/* Ignores subdevice/subvendor but as far as I can figure out
73 they're useless anyways */
74int __init early_is_k8_nb(u32 device)
75{
76 struct pci_device_id *id;
77 u32 vendor = device & 0xffff;
78 device >>= 16;
79 for (id = k8_nb_ids; id->vendor; id++)
80 if (vendor == id->vendor && device == id->device)
81 return 1;
82 return 0;
83}
84
85void k8_flush_garts(void)
86{
87 int flushed, i;
88 unsigned long flags;
89 static DEFINE_SPINLOCK(gart_lock);
90
91 /* Avoid races between AGP and IOMMU. In theory it's not needed
92 but I'm not sure if the hardware won't lose flush requests
93 when another is pending. This whole thing is so expensive anyways
94 that it doesn't matter to serialize more. -AK */
95 spin_lock_irqsave(&gart_lock, flags);
96 flushed = 0;
97 for (i = 0; i < num_k8_northbridges; i++) {
98 pci_write_config_dword(k8_northbridges[i], 0x9c,
99 flush_words[i]|1);
100 flushed++;
101 }
102 for (i = 0; i < num_k8_northbridges; i++) {
103 u32 w;
104 /* Make sure the hardware actually executed the flush*/
105 for (;;) {
106 pci_read_config_dword(k8_northbridges[i],
107 0x9c, &w);
108 if (!(w & 1))
109 break;
110 cpu_relax();
111 }
112 }
113 spin_unlock_irqrestore(&gart_lock, flags);
114 if (!flushed)
115 printk("nothing to flush?\n");
116}
117EXPORT_SYMBOL_GPL(k8_flush_garts);
118
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index ea8f4041794e..ded3af3bceec 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -32,6 +32,7 @@
32#include <asm/kdebug.h> 32#include <asm/kdebug.h>
33#include <asm/swiotlb.h> 33#include <asm/swiotlb.h>
34#include <asm/dma.h> 34#include <asm/dma.h>
35#include <asm/k8.h>
35 36
36unsigned long iommu_bus_base; /* GART remapping area (physical) */ 37unsigned long iommu_bus_base; /* GART remapping area (physical) */
37static unsigned long iommu_size; /* size of remapping area bytes */ 38static unsigned long iommu_size; /* size of remapping area bytes */
@@ -46,8 +47,6 @@ u32 *iommu_gatt_base; /* Remapping table */
46 also seen with Qlogic at least). */ 47 also seen with Qlogic at least). */
47int iommu_fullflush = 1; 48int iommu_fullflush = 1;
48 49
49#define MAX_NB 8
50
51/* Allocation bitmap for the remapping area */ 50/* Allocation bitmap for the remapping area */
52static DEFINE_SPINLOCK(iommu_bitmap_lock); 51static DEFINE_SPINLOCK(iommu_bitmap_lock);
53static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ 52static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
@@ -63,13 +62,6 @@ static u32 gart_unmapped_entry;
63#define to_pages(addr,size) \ 62#define to_pages(addr,size) \
64 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) 63 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
65 64
66#define for_all_nb(dev) \
67 dev = NULL; \
68 while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)
69
70static struct pci_dev *northbridges[MAX_NB];
71static u32 northbridge_flush_word[MAX_NB];
72
73#define EMERGENCY_PAGES 32 /* = 128KB */ 65#define EMERGENCY_PAGES 32 /* = 128KB */
74 66
75#ifdef CONFIG_AGP 67#ifdef CONFIG_AGP
@@ -120,44 +112,17 @@ static void free_iommu(unsigned long offset, int size)
120/* 112/*
121 * Use global flush state to avoid races with multiple flushers. 113 * Use global flush state to avoid races with multiple flushers.
122 */ 114 */
123static void flush_gart(struct device *dev) 115static void flush_gart(void)
124{ 116{
125 unsigned long flags; 117 unsigned long flags;
126 int flushed = 0;
127 int i, max;
128
129 spin_lock_irqsave(&iommu_bitmap_lock, flags); 118 spin_lock_irqsave(&iommu_bitmap_lock, flags);
130 if (need_flush) { 119 if (need_flush) {
131 max = 0; 120 k8_flush_garts();
132 for (i = 0; i < MAX_NB; i++) {
133 if (!northbridges[i])
134 continue;
135 pci_write_config_dword(northbridges[i], 0x9c,
136 northbridge_flush_word[i] | 1);
137 flushed++;
138 max = i;
139 }
140 for (i = 0; i <= max; i++) {
141 u32 w;
142 if (!northbridges[i])
143 continue;
144 /* Make sure the hardware actually executed the flush. */
145 for (;;) {
146 pci_read_config_dword(northbridges[i], 0x9c, &w);
147 if (!(w & 1))
148 break;
149 cpu_relax();
150 }
151 }
152 if (!flushed)
153 printk("nothing to flush?\n");
154 need_flush = 0; 121 need_flush = 0;
155 } 122 }
156 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 123 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
157} 124}
158 125
159
160
161#ifdef CONFIG_IOMMU_LEAK 126#ifdef CONFIG_IOMMU_LEAK
162 127
163#define SET_LEAK(x) if (iommu_leak_tab) \ 128#define SET_LEAK(x) if (iommu_leak_tab) \
@@ -266,7 +231,7 @@ static dma_addr_t gart_map_simple(struct device *dev, char *buf,
266 size_t size, int dir) 231 size_t size, int dir)
267{ 232{
268 dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); 233 dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
269 flush_gart(dev); 234 flush_gart();
270 return map; 235 return map;
271} 236}
272 237
@@ -351,7 +316,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
351 s->dma_address = addr; 316 s->dma_address = addr;
352 s->dma_length = s->length; 317 s->dma_length = s->length;
353 } 318 }
354 flush_gart(dev); 319 flush_gart();
355 return nents; 320 return nents;
356} 321}
357 322
@@ -458,13 +423,13 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
458 if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0) 423 if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
459 goto error; 424 goto error;
460 out++; 425 out++;
461 flush_gart(dev); 426 flush_gart();
462 if (out < nents) 427 if (out < nents)
463 sg[out].dma_length = 0; 428 sg[out].dma_length = 0;
464 return out; 429 return out;
465 430
466error: 431error:
467 flush_gart(NULL); 432 flush_gart();
468 gart_unmap_sg(dev, sg, nents, dir); 433 gart_unmap_sg(dev, sg, nents, dir);
469 /* When it was forced or merged try again in a dumb way */ 434 /* When it was forced or merged try again in a dumb way */
470 if (force_iommu || iommu_merge) { 435 if (force_iommu || iommu_merge) {
@@ -532,10 +497,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
532 void *gatt; 497 void *gatt;
533 unsigned aper_base, new_aper_base; 498 unsigned aper_base, new_aper_base;
534 unsigned aper_size, gatt_size, new_aper_size; 499 unsigned aper_size, gatt_size, new_aper_size;
535 500 int i;
501
536 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); 502 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
537 aper_size = aper_base = info->aper_size = 0; 503 aper_size = aper_base = info->aper_size = 0;
538 for_all_nb(dev) { 504 dev = NULL;
505 for (i = 0; i < num_k8_northbridges; i++) {
506 dev = k8_northbridges[i];
539 new_aper_base = read_aperture(dev, &new_aper_size); 507 new_aper_base = read_aperture(dev, &new_aper_size);
540 if (!new_aper_base) 508 if (!new_aper_base)
541 goto nommu; 509 goto nommu;
@@ -558,11 +526,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
558 panic("Cannot allocate GATT table"); 526 panic("Cannot allocate GATT table");
559 memset(gatt, 0, gatt_size); 527 memset(gatt, 0, gatt_size);
560 agp_gatt_table = gatt; 528 agp_gatt_table = gatt;
561 529
562 for_all_nb(dev) { 530 for (i = 0; i < num_k8_northbridges; i++) {
563 u32 ctl; 531 u32 ctl;
564 u32 gatt_reg; 532 u32 gatt_reg;
565 533
534 dev = k8_northbridges[i];
566 gatt_reg = __pa(gatt) >> 12; 535 gatt_reg = __pa(gatt) >> 12;
567 gatt_reg <<= 4; 536 gatt_reg <<= 4;
568 pci_write_config_dword(dev, 0x98, gatt_reg); 537 pci_write_config_dword(dev, 0x98, gatt_reg);
@@ -573,7 +542,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
573 542
574 pci_write_config_dword(dev, 0x90, ctl); 543 pci_write_config_dword(dev, 0x90, ctl);
575 } 544 }
576 flush_gart(NULL); 545 flush_gart();
577 546
578 printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); 547 printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10);
579 return 0; 548 return 0;
@@ -607,10 +576,14 @@ static int __init pci_iommu_init(void)
607 struct agp_kern_info info; 576 struct agp_kern_info info;
608 unsigned long aper_size; 577 unsigned long aper_size;
609 unsigned long iommu_start; 578 unsigned long iommu_start;
610 struct pci_dev *dev;
611 unsigned long scratch; 579 unsigned long scratch;
612 long i; 580 long i;
613 581
582 if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) {
583 printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n");
584 return -1;
585 }
586
614#ifndef CONFIG_AGP_AMD64 587#ifndef CONFIG_AGP_AMD64
615 no_agp = 1; 588 no_agp = 1;
616#else 589#else
@@ -637,14 +610,6 @@ static int __init pci_iommu_init(void)
637 return -1; 610 return -1;
638 } 611 }
639 612
640 i = 0;
641 for_all_nb(dev)
642 i++;
643 if (i > MAX_NB) {
644 printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i);
645 return -1;
646 }
647
648 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); 613 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
649 aper_size = info.aper_size * 1024 * 1024; 614 aper_size = info.aper_size * 1024 * 1024;
650 iommu_size = check_iommu_size(info.aper_base, aper_size); 615 iommu_size = check_iommu_size(info.aper_base, aper_size);
@@ -707,20 +672,8 @@ static int __init pci_iommu_init(void)
707 for (i = EMERGENCY_PAGES; i < iommu_pages; i++) 672 for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
708 iommu_gatt_base[i] = gart_unmapped_entry; 673 iommu_gatt_base[i] = gart_unmapped_entry;
709 674
710 for_all_nb(dev) { 675 flush_gart();
711 u32 flag;
712 int cpu = PCI_SLOT(dev->devfn) - 24;
713 if (cpu >= MAX_NB)
714 continue;
715 northbridges[cpu] = dev;
716 pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
717 northbridge_flush_word[cpu] = flag;
718 }
719
720 flush_gart(NULL);
721
722 dma_ops = &gart_dma_ops; 676 dma_ops = &gart_dma_ops;
723
724 return 0; 677 return 0;
725} 678}
726 679