aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYinghai Lu <Yinghai.Lu@Sun.COM>2008-02-19 06:20:09 -0500
committerIngo Molnar <mingo@elte.hu>2008-04-26 17:41:04 -0400
commit871d5f8dd0f7647f03facd4cb79485938d1b61ab (patch)
treeb08eee02ddd7b4bdb9dfde2637f5154e409cdacc
parentbb63b4219976d48ed6d22ac33c18be334fb5a78c (diff)
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of pci devices under /sys/devices/pci0000:80/* is always 0, even if that chain is on node 1 or 2 or 3. Workaround: pcibus_to_node(bus) is used when we want to get the node that pci_device is on. In struct device, we already have numa_node member, and we could use dev_to_node()/set_dev_node() to get and set numa_node in the device. set_dev_node is called in pci_device_add() with pcibus_to_node(bus), and pcibus_to_node uses bus->sysdata for nodeid. The problem is when pci_add_device is called, bus->sysdata is not assigned correct nodeid yet. The result is that numa_node will always be 0. pcibios_scan_root and pci_scan_root could take sysdata. So we need to get mp_bus_to_node mapping before these two are called, and thus get_mp_bus_to_node could get correct node for sysdata in root bus. In scanning of the root bus, all child busses will take parent bus sysdata. So all pci_device->dev.numa_node will be assigned correctly and automatically. Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we could also could make other bus specific device get the correct numa_node too. This is an updated version of pci_sysdata and Jeff's pci_domain patch. [ mingo@elte.hu: build fix ] Signed-off-by: Yinghai Lu <yinghai.lu@sun.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/pci/Makefile_321
-rw-r--r--arch/x86/pci/acpi.c27
-rw-r--r--arch/x86/pci/common.c18
-rw-r--r--arch/x86/pci/irq.c4
-rw-r--r--arch/x86/pci/k8-bus_64.c92
-rw-r--r--arch/x86/pci/legacy.c4
-rw-r--r--arch/x86/pci/mp_bus_to_node.c23
-rw-r--r--include/asm-x86/pci.h2
-rw-r--r--include/asm-x86/topology.h13
9 files changed, 142 insertions, 42 deletions
diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32
index cdd6828b5abb..e9c5caf54e59 100644
--- a/arch/x86/pci/Makefile_32
+++ b/arch/x86/pci/Makefile_32
@@ -10,5 +10,6 @@ pci-y += legacy.o irq.o
10 10
11pci-$(CONFIG_X86_VISWS) := visws.o fixup.o 11pci-$(CONFIG_X86_VISWS) := visws.o fixup.o
12pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o 12pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o
13pci-$(CONFIG_NUMA) += mp_bus_to_node.o
13 14
14obj-y += $(pci-y) common.o early.o 15obj-y += $(pci-y) common.o early.o
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 2664cb3fc96c..1a9c0c6a1a18 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -191,7 +191,10 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
191{ 191{
192 struct pci_bus *bus; 192 struct pci_bus *bus;
193 struct pci_sysdata *sd; 193 struct pci_sysdata *sd;
194 int node;
195#ifdef CONFIG_ACPI_NUMA
194 int pxm; 196 int pxm;
197#endif
195 198
196 dmi_check_system(acpi_pciprobe_dmi_table); 199 dmi_check_system(acpi_pciprobe_dmi_table);
197 200
@@ -201,6 +204,17 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
201 return NULL; 204 return NULL;
202 } 205 }
203 206
207 node = -1;
208#ifdef CONFIG_ACPI_NUMA
209 pxm = acpi_get_pxm(device->handle);
210 if (pxm >= 0)
211 node = pxm_to_node(pxm);
212 if (node != -1)
213 set_mp_bus_to_node(busnum, node);
214 else
215 node = get_mp_bus_to_node(busnum);
216#endif
217
204 /* Allocate per-root-bus (not per bus) arch-specific data. 218 /* Allocate per-root-bus (not per bus) arch-specific data.
205 * TODO: leak; this memory is never freed. 219 * TODO: leak; this memory is never freed.
206 * It's arguable whether it's worth the trouble to care. 220 * It's arguable whether it's worth the trouble to care.
@@ -212,13 +226,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
212 } 226 }
213 227
214 sd->domain = domain; 228 sd->domain = domain;
215 sd->node = -1; 229 sd->node = node;
216
217 pxm = acpi_get_pxm(device->handle);
218#ifdef CONFIG_ACPI_NUMA
219 if (pxm >= 0)
220 sd->node = pxm_to_node(pxm);
221#endif
222 /* 230 /*
223 * Maybe the desired pci bus has been already scanned. In such case 231 * Maybe the desired pci bus has been already scanned. In such case
224 * it is unnecessary to scan the pci bus with the given domain,busnum. 232 * it is unnecessary to scan the pci bus with the given domain,busnum.
@@ -238,9 +246,9 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
238 kfree(sd); 246 kfree(sd);
239 247
240#ifdef CONFIG_ACPI_NUMA 248#ifdef CONFIG_ACPI_NUMA
241 if (bus != NULL) { 249 if (bus) {
242 if (pxm >= 0) { 250 if (pxm >= 0) {
243 printk("bus %d -> pxm %d -> node %d\n", 251 printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n",
244 busnum, pxm, pxm_to_node(pxm)); 252 busnum, pxm, pxm_to_node(pxm));
245 } 253 }
246 } 254 }
@@ -248,7 +256,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
248 256
249 if (bus && (pci_probe & PCI_USE__CRS)) 257 if (bus && (pci_probe & PCI_USE__CRS))
250 get_current_resources(device, busnum, domain, bus); 258 get_current_resources(device, busnum, domain, bus);
251
252 return bus; 259 return bus;
253} 260}
254 261
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 75fcc29ecf52..07d53184f7a4 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -342,9 +342,14 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
342 return NULL; 342 return NULL;
343 } 343 }
344 344
345 sd->node = get_mp_bus_to_node(busnum);
346
345 printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); 347 printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum);
348 bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd);
349 if (!bus)
350 kfree(sd);
346 351
347 return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); 352 return bus;
348} 353}
349 354
350extern u8 pci_cache_line_size; 355extern u8 pci_cache_line_size;
@@ -480,7 +485,7 @@ void pcibios_disable_device (struct pci_dev *dev)
480 pcibios_disable_irq(dev); 485 pcibios_disable_irq(dev);
481} 486}
482 487
483struct pci_bus *__devinit pci_scan_bus_with_sysdata(int busno) 488struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
484{ 489{
485 struct pci_bus *bus = NULL; 490 struct pci_bus *bus = NULL;
486 struct pci_sysdata *sd; 491 struct pci_sysdata *sd;
@@ -495,10 +500,15 @@ struct pci_bus *__devinit pci_scan_bus_with_sysdata(int busno)
495 printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busno); 500 printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busno);
496 return NULL; 501 return NULL;
497 } 502 }
498 sd->node = -1; 503 sd->node = node;
499 bus = pci_scan_bus(busno, &pci_root_ops, sd); 504 bus = pci_scan_bus(busno, ops, sd);
500 if (!bus) 505 if (!bus)
501 kfree(sd); 506 kfree(sd);
502 507
503 return bus; 508 return bus;
504} 509}
510
511struct pci_bus *pci_scan_bus_with_sysdata(int busno)
512{
513 return pci_scan_bus_on_node(busno, &pci_root_ops, -1);
514}
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 579745ca6b66..0908fca901bf 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -136,9 +136,11 @@ static void __init pirq_peer_trick(void)
136 busmap[e->bus] = 1; 136 busmap[e->bus] = 1;
137 } 137 }
138 for(i = 1; i < 256; i++) { 138 for(i = 1; i < 256; i++) {
139 int node;
139 if (!busmap[i] || pci_find_bus(0, i)) 140 if (!busmap[i] || pci_find_bus(0, i))
140 continue; 141 continue;
141 if (pci_scan_bus_with_sysdata(i)) 142 node = get_mp_bus_to_node(i);
143 if (pci_scan_bus_on_node(i, &pci_root_ops, node))
142 printk(KERN_INFO "PCI: Discovered primary peer " 144 printk(KERN_INFO "PCI: Discovered primary peer "
143 "bus %02x [IRQ]\n", i); 145 "bus %02x [IRQ]\n", i);
144 } 146 }
diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/k8-bus_64.c
index 9cc813e29706..3903efbca535 100644
--- a/arch/x86/pci/k8-bus_64.c
+++ b/arch/x86/pci/k8-bus_64.c
@@ -1,7 +1,9 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/pci.h> 2#include <linux/pci.h>
3#include <asm/pci-direct.h>
3#include <asm/mpspec.h> 4#include <asm/mpspec.h>
4#include <linux/cpumask.h> 5#include <linux/cpumask.h>
6#include <linux/topology.h>
5 7
6/* 8/*
7 * This discovers the pcibus <-> node mapping on AMD K8. 9 * This discovers the pcibus <-> node mapping on AMD K8.
@@ -20,64 +22,102 @@
20#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) 22#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF)
21#define PCI_DEVICE_ID_K8HTCONFIG 0x1100 23#define PCI_DEVICE_ID_K8HTCONFIG 0x1100
22 24
25#ifdef CONFIG_NUMA
26
27#define BUS_NR 256
28
29static int mp_bus_to_node[BUS_NR];
30
31void set_mp_bus_to_node(int busnum, int node)
32{
33 if (busnum >= 0 && busnum < BUS_NR)
34 mp_bus_to_node[busnum] = node;
35}
36
37int get_mp_bus_to_node(int busnum)
38{
39 int node = -1;
40
41 if (busnum < 0 || busnum > (BUS_NR - 1))
42 return node;
43
44 node = mp_bus_to_node[busnum];
45
46 /*
47 * let numa_node_id to decide it later in dma_alloc_pages
48 * if there is no ram on that node
49 */
50 if (node != -1 && !node_online(node))
51 node = -1;
52
53 return node;
54}
55
56#endif
57
23/** 58/**
24 * fill_mp_bus_to_cpumask() 59 * early_fill_mp_bus_to_node()
60 * called before pcibios_scan_root and pci_scan_bus
25 * fills the mp_bus_to_cpumask array based according to the LDT Bus Number 61 * fills the mp_bus_to_cpumask array based according to the LDT Bus Number
26 * Registers found in the K8 northbridge 62 * Registers found in the K8 northbridge
27 */ 63 */
28__init static int 64__init static int
29fill_mp_bus_to_cpumask(void) 65early_fill_mp_bus_to_node(void)
30{ 66{
31 struct pci_dev *nb_dev = NULL; 67#ifdef CONFIG_NUMA
32 int i, j; 68 int i, j;
69 unsigned slot;
33 u32 ldtbus, nid; 70 u32 ldtbus, nid;
71 u32 id;
34 static int lbnr[3] = { 72 static int lbnr[3] = {
35 LDT_BUS_NUMBER_REGISTER_0, 73 LDT_BUS_NUMBER_REGISTER_0,
36 LDT_BUS_NUMBER_REGISTER_1, 74 LDT_BUS_NUMBER_REGISTER_1,
37 LDT_BUS_NUMBER_REGISTER_2 75 LDT_BUS_NUMBER_REGISTER_2
38 }; 76 };
39 77
40 while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, 78 for (i = 0; i < BUS_NR; i++)
41 PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) { 79 mp_bus_to_node[i] = -1;
42 pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid); 80
81 if (!early_pci_allowed())
82 return -1;
83
84 for (slot = 0x18; slot < 0x20; slot++) {
85 id = read_pci_config(0, slot, 0, PCI_VENDOR_ID);
86 if (id != (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_K8HTCONFIG<<16)))
87 break;
88 nid = read_pci_config(0, slot, 0, NODE_ID_REGISTER);
43 89
44 for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { 90 for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) {
45 pci_read_config_dword(nb_dev, lbnr[i], &ldtbus); 91 ldtbus = read_pci_config(0, slot, 0, lbnr[i]);
46 /* 92 /*
47 * if there are no busses hanging off of the current 93 * if there are no busses hanging off of the current
48 * ldt link then both the secondary and subordinate 94 * ldt link then both the secondary and subordinate
49 * bus number fields are set to 0. 95 * bus number fields are set to 0.
50 * 96 *
51 * RED-PEN 97 * RED-PEN
52 * This is slightly broken because it assumes 98 * This is slightly broken because it assumes
53 * HT node IDs == Linux node ids, which is not always 99 * HT node IDs == Linux node ids, which is not always
54 * true. However it is probably mostly true. 100 * true. However it is probably mostly true.
55 */ 101 */
56 if (!(SECONDARY_LDT_BUS_NUMBER(ldtbus) == 0 102 if (!(SECONDARY_LDT_BUS_NUMBER(ldtbus) == 0
57 && SUBORDINATE_LDT_BUS_NUMBER(ldtbus) == 0)) { 103 && SUBORDINATE_LDT_BUS_NUMBER(ldtbus) == 0)) {
58 for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus); 104 for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus);
59 j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus); 105 j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus);
60 j++) { 106 j++) {
61 struct pci_bus *bus; 107 int node = NODE_ID(nid);
62 struct pci_sysdata *sd; 108 mp_bus_to_node[j] = (unsigned char)node;
63 109 }
64 long node = NODE_ID(nid);
65 /* Algorithm a bit dumb, but
66 it shouldn't matter here */
67 bus = pci_find_bus(0, j);
68 if (!bus)
69 continue;
70 if (!node_online(node))
71 node = 0;
72
73 sd = bus->sysdata;
74 sd->node = node;
75 }
76 } 110 }
77 } 111 }
78 } 112 }
79 113
114 for (i = 0; i < BUS_NR; i++) {
115 int node = mp_bus_to_node[i];
116 if (node >= 0)
117 printk(KERN_DEBUG "bus: %02x to node: %02x\n", i, node);
118 }
119#endif
80 return 0; 120 return 0;
81} 121}
82 122
83fs_initcall(fill_mp_bus_to_cpumask); 123postcore_initcall(early_fill_mp_bus_to_node);
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index e041ced0ce13..a67921ce60af 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -12,6 +12,7 @@
12static void __devinit pcibios_fixup_peer_bridges(void) 12static void __devinit pcibios_fixup_peer_bridges(void)
13{ 13{
14 int n, devfn; 14 int n, devfn;
15 long node;
15 16
16 if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) 17 if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff)
17 return; 18 return;
@@ -21,12 +22,13 @@ static void __devinit pcibios_fixup_peer_bridges(void)
21 u32 l; 22 u32 l;
22 if (pci_find_bus(0, n)) 23 if (pci_find_bus(0, n))
23 continue; 24 continue;
25 node = get_mp_bus_to_node(n);
24 for (devfn = 0; devfn < 256; devfn += 8) { 26 for (devfn = 0; devfn < 256; devfn += 8) {
25 if (!raw_pci_read(0, n, devfn, PCI_VENDOR_ID, 2, &l) && 27 if (!raw_pci_read(0, n, devfn, PCI_VENDOR_ID, 2, &l) &&
26 l != 0x0000 && l != 0xffff) { 28 l != 0x0000 && l != 0xffff) {
27 DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l); 29 DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l);
28 printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); 30 printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n);
29 pci_scan_bus_with_sysdata(n); 31 pci_scan_bus_on_node(n, &pci_root_ops, node);
30 break; 32 break;
31 } 33 }
32 } 34 }
diff --git a/arch/x86/pci/mp_bus_to_node.c b/arch/x86/pci/mp_bus_to_node.c
new file mode 100644
index 000000000000..022943999b84
--- /dev/null
+++ b/arch/x86/pci/mp_bus_to_node.c
@@ -0,0 +1,23 @@
1#include <linux/pci.h>
2#include <linux/init.h>
3#include <linux/topology.h>
4
5#define BUS_NR 256
6
7static unsigned char mp_bus_to_node[BUS_NR];
8
9void set_mp_bus_to_node(int busnum, int node)
10{
11 if (busnum >= 0 && busnum < BUS_NR)
12 mp_bus_to_node[busnum] = (unsigned char) node;
13}
14
15int get_mp_bus_to_node(int busnum)
16{
17 int node;
18
19 if (busnum < 0 || busnum > (BUS_NR - 1))
20 return 0;
21 node = mp_bus_to_node[busnum];
22 return node;
23}
diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h
index ddd8e248fc0a..30bbde0cb34b 100644
--- a/include/asm-x86/pci.h
+++ b/include/asm-x86/pci.h
@@ -19,6 +19,8 @@ struct pci_sysdata {
19}; 19};
20 20
21/* scan a bus after allocating a pci_sysdata for it */ 21/* scan a bus after allocating a pci_sysdata for it */
22extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
23 int node);
22extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); 24extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
23 25
24static inline int pci_domain_nr(struct pci_bus *bus) 26static inline int pci_domain_nr(struct pci_bus *bus)
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index 22073268b481..4793ae745a78 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -198,4 +198,17 @@ extern cpumask_t cpu_coregroup_map(int cpu);
198#define smt_capable() (smp_num_siblings > 1) 198#define smt_capable() (smp_num_siblings > 1)
199#endif 199#endif
200 200
201#ifdef CONFIG_NUMA
202extern int get_mp_bus_to_node(int busnum);
203extern void set_mp_bus_to_node(int busnum, int node);
204#else
205static inline int get_mp_bus_to_node(int busnum)
206{
207 return 0;
208}
209static inline void set_mp_bus_to_node(int busnum, int node)
210{
211}
212#endif
213
201#endif 214#endif