diff options
author | Yinghai Lu <Yinghai.Lu@Sun.COM> | 2008-02-19 06:20:09 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-26 17:41:04 -0400 |
commit | 871d5f8dd0f7647f03facd4cb79485938d1b61ab (patch) | |
tree | b08eee02ddd7b4bdb9dfde2637f5154e409cdacc /arch/x86/pci | |
parent | bb63b4219976d48ed6d22ac33c18be334fb5a78c (diff) |
x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.
Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.
In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.
The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.
pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.
In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.
Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.
This is an updated version of pci_sysdata and Jeff's pci_domain patch.
[ mingo@elte.hu: build fix ]
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/pci')
-rw-r--r-- | arch/x86/pci/Makefile_32 | 1 | ||||
-rw-r--r-- | arch/x86/pci/acpi.c | 27 | ||||
-rw-r--r-- | arch/x86/pci/common.c | 18 | ||||
-rw-r--r-- | arch/x86/pci/irq.c | 4 | ||||
-rw-r--r-- | arch/x86/pci/k8-bus_64.c | 92 | ||||
-rw-r--r-- | arch/x86/pci/legacy.c | 4 | ||||
-rw-r--r-- | arch/x86/pci/mp_bus_to_node.c | 23 |
7 files changed, 127 insertions, 42 deletions
diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32 index cdd6828b5abb..e9c5caf54e59 100644 --- a/arch/x86/pci/Makefile_32 +++ b/arch/x86/pci/Makefile_32 | |||
@@ -10,5 +10,6 @@ pci-y += legacy.o irq.o | |||
10 | 10 | ||
11 | pci-$(CONFIG_X86_VISWS) := visws.o fixup.o | 11 | pci-$(CONFIG_X86_VISWS) := visws.o fixup.o |
12 | pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o | 12 | pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o |
13 | pci-$(CONFIG_NUMA) += mp_bus_to_node.o | ||
13 | 14 | ||
14 | obj-y += $(pci-y) common.o early.o | 15 | obj-y += $(pci-y) common.o early.o |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 2664cb3fc96c..1a9c0c6a1a18 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -191,7 +191,10 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do | |||
191 | { | 191 | { |
192 | struct pci_bus *bus; | 192 | struct pci_bus *bus; |
193 | struct pci_sysdata *sd; | 193 | struct pci_sysdata *sd; |
194 | int node; | ||
195 | #ifdef CONFIG_ACPI_NUMA | ||
194 | int pxm; | 196 | int pxm; |
197 | #endif | ||
195 | 198 | ||
196 | dmi_check_system(acpi_pciprobe_dmi_table); | 199 | dmi_check_system(acpi_pciprobe_dmi_table); |
197 | 200 | ||
@@ -201,6 +204,17 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do | |||
201 | return NULL; | 204 | return NULL; |
202 | } | 205 | } |
203 | 206 | ||
207 | node = -1; | ||
208 | #ifdef CONFIG_ACPI_NUMA | ||
209 | pxm = acpi_get_pxm(device->handle); | ||
210 | if (pxm >= 0) | ||
211 | node = pxm_to_node(pxm); | ||
212 | if (node != -1) | ||
213 | set_mp_bus_to_node(busnum, node); | ||
214 | else | ||
215 | node = get_mp_bus_to_node(busnum); | ||
216 | #endif | ||
217 | |||
204 | /* Allocate per-root-bus (not per bus) arch-specific data. | 218 | /* Allocate per-root-bus (not per bus) arch-specific data. |
205 | * TODO: leak; this memory is never freed. | 219 | * TODO: leak; this memory is never freed. |
206 | * It's arguable whether it's worth the trouble to care. | 220 | * It's arguable whether it's worth the trouble to care. |
@@ -212,13 +226,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do | |||
212 | } | 226 | } |
213 | 227 | ||
214 | sd->domain = domain; | 228 | sd->domain = domain; |
215 | sd->node = -1; | 229 | sd->node = node; |
216 | |||
217 | pxm = acpi_get_pxm(device->handle); | ||
218 | #ifdef CONFIG_ACPI_NUMA | ||
219 | if (pxm >= 0) | ||
220 | sd->node = pxm_to_node(pxm); | ||
221 | #endif | ||
222 | /* | 230 | /* |
223 | * Maybe the desired pci bus has been already scanned. In such case | 231 | * Maybe the desired pci bus has been already scanned. In such case |
224 | * it is unnecessary to scan the pci bus with the given domain,busnum. | 232 | * it is unnecessary to scan the pci bus with the given domain,busnum. |
@@ -238,9 +246,9 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do | |||
238 | kfree(sd); | 246 | kfree(sd); |
239 | 247 | ||
240 | #ifdef CONFIG_ACPI_NUMA | 248 | #ifdef CONFIG_ACPI_NUMA |
241 | if (bus != NULL) { | 249 | if (bus) { |
242 | if (pxm >= 0) { | 250 | if (pxm >= 0) { |
243 | printk("bus %d -> pxm %d -> node %d\n", | 251 | printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n", |
244 | busnum, pxm, pxm_to_node(pxm)); | 252 | busnum, pxm, pxm_to_node(pxm)); |
245 | } | 253 | } |
246 | } | 254 | } |
@@ -248,7 +256,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do | |||
248 | 256 | ||
249 | if (bus && (pci_probe & PCI_USE__CRS)) | 257 | if (bus && (pci_probe & PCI_USE__CRS)) |
250 | get_current_resources(device, busnum, domain, bus); | 258 | get_current_resources(device, busnum, domain, bus); |
251 | |||
252 | return bus; | 259 | return bus; |
253 | } | 260 | } |
254 | 261 | ||
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 75fcc29ecf52..07d53184f7a4 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -342,9 +342,14 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) | |||
342 | return NULL; | 342 | return NULL; |
343 | } | 343 | } |
344 | 344 | ||
345 | sd->node = get_mp_bus_to_node(busnum); | ||
346 | |||
345 | printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); | 347 | printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); |
348 | bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); | ||
349 | if (!bus) | ||
350 | kfree(sd); | ||
346 | 351 | ||
347 | return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); | 352 | return bus; |
348 | } | 353 | } |
349 | 354 | ||
350 | extern u8 pci_cache_line_size; | 355 | extern u8 pci_cache_line_size; |
@@ -480,7 +485,7 @@ void pcibios_disable_device (struct pci_dev *dev) | |||
480 | pcibios_disable_irq(dev); | 485 | pcibios_disable_irq(dev); |
481 | } | 486 | } |
482 | 487 | ||
483 | struct pci_bus *__devinit pci_scan_bus_with_sysdata(int busno) | 488 | struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) |
484 | { | 489 | { |
485 | struct pci_bus *bus = NULL; | 490 | struct pci_bus *bus = NULL; |
486 | struct pci_sysdata *sd; | 491 | struct pci_sysdata *sd; |
@@ -495,10 +500,15 @@ struct pci_bus *__devinit pci_scan_bus_with_sysdata(int busno) | |||
495 | printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busno); | 500 | printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busno); |
496 | return NULL; | 501 | return NULL; |
497 | } | 502 | } |
498 | sd->node = -1; | 503 | sd->node = node; |
499 | bus = pci_scan_bus(busno, &pci_root_ops, sd); | 504 | bus = pci_scan_bus(busno, ops, sd); |
500 | if (!bus) | 505 | if (!bus) |
501 | kfree(sd); | 506 | kfree(sd); |
502 | 507 | ||
503 | return bus; | 508 | return bus; |
504 | } | 509 | } |
510 | |||
511 | struct pci_bus *pci_scan_bus_with_sysdata(int busno) | ||
512 | { | ||
513 | return pci_scan_bus_on_node(busno, &pci_root_ops, -1); | ||
514 | } | ||
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 579745ca6b66..0908fca901bf 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -136,9 +136,11 @@ static void __init pirq_peer_trick(void) | |||
136 | busmap[e->bus] = 1; | 136 | busmap[e->bus] = 1; |
137 | } | 137 | } |
138 | for(i = 1; i < 256; i++) { | 138 | for(i = 1; i < 256; i++) { |
139 | int node; | ||
139 | if (!busmap[i] || pci_find_bus(0, i)) | 140 | if (!busmap[i] || pci_find_bus(0, i)) |
140 | continue; | 141 | continue; |
141 | if (pci_scan_bus_with_sysdata(i)) | 142 | node = get_mp_bus_to_node(i); |
143 | if (pci_scan_bus_on_node(i, &pci_root_ops, node)) | ||
142 | printk(KERN_INFO "PCI: Discovered primary peer " | 144 | printk(KERN_INFO "PCI: Discovered primary peer " |
143 | "bus %02x [IRQ]\n", i); | 145 | "bus %02x [IRQ]\n", i); |
144 | } | 146 | } |
diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/k8-bus_64.c index 9cc813e29706..3903efbca535 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/k8-bus_64.c | |||
@@ -1,7 +1,9 @@ | |||
1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
2 | #include <linux/pci.h> | 2 | #include <linux/pci.h> |
3 | #include <asm/pci-direct.h> | ||
3 | #include <asm/mpspec.h> | 4 | #include <asm/mpspec.h> |
4 | #include <linux/cpumask.h> | 5 | #include <linux/cpumask.h> |
6 | #include <linux/topology.h> | ||
5 | 7 | ||
6 | /* | 8 | /* |
7 | * This discovers the pcibus <-> node mapping on AMD K8. | 9 | * This discovers the pcibus <-> node mapping on AMD K8. |
@@ -20,64 +22,102 @@ | |||
20 | #define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) | 22 | #define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) |
21 | #define PCI_DEVICE_ID_K8HTCONFIG 0x1100 | 23 | #define PCI_DEVICE_ID_K8HTCONFIG 0x1100 |
22 | 24 | ||
25 | #ifdef CONFIG_NUMA | ||
26 | |||
27 | #define BUS_NR 256 | ||
28 | |||
29 | static int mp_bus_to_node[BUS_NR]; | ||
30 | |||
31 | void set_mp_bus_to_node(int busnum, int node) | ||
32 | { | ||
33 | if (busnum >= 0 && busnum < BUS_NR) | ||
34 | mp_bus_to_node[busnum] = node; | ||
35 | } | ||
36 | |||
37 | int get_mp_bus_to_node(int busnum) | ||
38 | { | ||
39 | int node = -1; | ||
40 | |||
41 | if (busnum < 0 || busnum > (BUS_NR - 1)) | ||
42 | return node; | ||
43 | |||
44 | node = mp_bus_to_node[busnum]; | ||
45 | |||
46 | /* | ||
47 | * let numa_node_id to decide it later in dma_alloc_pages | ||
48 | * if there is no ram on that node | ||
49 | */ | ||
50 | if (node != -1 && !node_online(node)) | ||
51 | node = -1; | ||
52 | |||
53 | return node; | ||
54 | } | ||
55 | |||
56 | #endif | ||
57 | |||
23 | /** | 58 | /** |
24 | * fill_mp_bus_to_cpumask() | 59 | * early_fill_mp_bus_to_node() |
60 | * called before pcibios_scan_root and pci_scan_bus | ||
25 | * fills the mp_bus_to_cpumask array based according to the LDT Bus Number | 61 | * fills the mp_bus_to_cpumask array based according to the LDT Bus Number |
26 | * Registers found in the K8 northbridge | 62 | * Registers found in the K8 northbridge |
27 | */ | 63 | */ |
28 | __init static int | 64 | __init static int |
29 | fill_mp_bus_to_cpumask(void) | 65 | early_fill_mp_bus_to_node(void) |
30 | { | 66 | { |
31 | struct pci_dev *nb_dev = NULL; | 67 | #ifdef CONFIG_NUMA |
32 | int i, j; | 68 | int i, j; |
69 | unsigned slot; | ||
33 | u32 ldtbus, nid; | 70 | u32 ldtbus, nid; |
71 | u32 id; | ||
34 | static int lbnr[3] = { | 72 | static int lbnr[3] = { |
35 | LDT_BUS_NUMBER_REGISTER_0, | 73 | LDT_BUS_NUMBER_REGISTER_0, |
36 | LDT_BUS_NUMBER_REGISTER_1, | 74 | LDT_BUS_NUMBER_REGISTER_1, |
37 | LDT_BUS_NUMBER_REGISTER_2 | 75 | LDT_BUS_NUMBER_REGISTER_2 |
38 | }; | 76 | }; |
39 | 77 | ||
40 | while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, | 78 | for (i = 0; i < BUS_NR; i++) |
41 | PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) { | 79 | mp_bus_to_node[i] = -1; |
42 | pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid); | 80 | |
81 | if (!early_pci_allowed()) | ||
82 | return -1; | ||
83 | |||
84 | for (slot = 0x18; slot < 0x20; slot++) { | ||
85 | id = read_pci_config(0, slot, 0, PCI_VENDOR_ID); | ||
86 | if (id != (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_K8HTCONFIG<<16))) | ||
87 | break; | ||
88 | nid = read_pci_config(0, slot, 0, NODE_ID_REGISTER); | ||
43 | 89 | ||
44 | for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { | 90 | for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { |
45 | pci_read_config_dword(nb_dev, lbnr[i], &ldtbus); | 91 | ldtbus = read_pci_config(0, slot, 0, lbnr[i]); |
46 | /* | 92 | /* |
47 | * if there are no busses hanging off of the current | 93 | * if there are no busses hanging off of the current |
48 | * ldt link then both the secondary and subordinate | 94 | * ldt link then both the secondary and subordinate |
49 | * bus number fields are set to 0. | 95 | * bus number fields are set to 0. |
50 | * | 96 | * |
51 | * RED-PEN | 97 | * RED-PEN |
52 | * This is slightly broken because it assumes | 98 | * This is slightly broken because it assumes |
53 | * HT node IDs == Linux node ids, which is not always | 99 | * HT node IDs == Linux node ids, which is not always |
54 | * true. However it is probably mostly true. | 100 | * true. However it is probably mostly true. |
55 | */ | 101 | */ |
56 | if (!(SECONDARY_LDT_BUS_NUMBER(ldtbus) == 0 | 102 | if (!(SECONDARY_LDT_BUS_NUMBER(ldtbus) == 0 |
57 | && SUBORDINATE_LDT_BUS_NUMBER(ldtbus) == 0)) { | 103 | && SUBORDINATE_LDT_BUS_NUMBER(ldtbus) == 0)) { |
58 | for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus); | 104 | for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus); |
59 | j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus); | 105 | j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus); |
60 | j++) { | 106 | j++) { |
61 | struct pci_bus *bus; | 107 | int node = NODE_ID(nid); |
62 | struct pci_sysdata *sd; | 108 | mp_bus_to_node[j] = (unsigned char)node; |
63 | 109 | } | |
64 | long node = NODE_ID(nid); | ||
65 | /* Algorithm a bit dumb, but | ||
66 | it shouldn't matter here */ | ||
67 | bus = pci_find_bus(0, j); | ||
68 | if (!bus) | ||
69 | continue; | ||
70 | if (!node_online(node)) | ||
71 | node = 0; | ||
72 | |||
73 | sd = bus->sysdata; | ||
74 | sd->node = node; | ||
75 | } | ||
76 | } | 110 | } |
77 | } | 111 | } |
78 | } | 112 | } |
79 | 113 | ||
114 | for (i = 0; i < BUS_NR; i++) { | ||
115 | int node = mp_bus_to_node[i]; | ||
116 | if (node >= 0) | ||
117 | printk(KERN_DEBUG "bus: %02x to node: %02x\n", i, node); | ||
118 | } | ||
119 | #endif | ||
80 | return 0; | 120 | return 0; |
81 | } | 121 | } |
82 | 122 | ||
83 | fs_initcall(fill_mp_bus_to_cpumask); | 123 | postcore_initcall(early_fill_mp_bus_to_node); |
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index e041ced0ce13..a67921ce60af 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c | |||
@@ -12,6 +12,7 @@ | |||
12 | static void __devinit pcibios_fixup_peer_bridges(void) | 12 | static void __devinit pcibios_fixup_peer_bridges(void) |
13 | { | 13 | { |
14 | int n, devfn; | 14 | int n, devfn; |
15 | long node; | ||
15 | 16 | ||
16 | if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) | 17 | if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) |
17 | return; | 18 | return; |
@@ -21,12 +22,13 @@ static void __devinit pcibios_fixup_peer_bridges(void) | |||
21 | u32 l; | 22 | u32 l; |
22 | if (pci_find_bus(0, n)) | 23 | if (pci_find_bus(0, n)) |
23 | continue; | 24 | continue; |
25 | node = get_mp_bus_to_node(n); | ||
24 | for (devfn = 0; devfn < 256; devfn += 8) { | 26 | for (devfn = 0; devfn < 256; devfn += 8) { |
25 | if (!raw_pci_read(0, n, devfn, PCI_VENDOR_ID, 2, &l) && | 27 | if (!raw_pci_read(0, n, devfn, PCI_VENDOR_ID, 2, &l) && |
26 | l != 0x0000 && l != 0xffff) { | 28 | l != 0x0000 && l != 0xffff) { |
27 | DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l); | 29 | DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l); |
28 | printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); | 30 | printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); |
29 | pci_scan_bus_with_sysdata(n); | 31 | pci_scan_bus_on_node(n, &pci_root_ops, node); |
30 | break; | 32 | break; |
31 | } | 33 | } |
32 | } | 34 | } |
diff --git a/arch/x86/pci/mp_bus_to_node.c b/arch/x86/pci/mp_bus_to_node.c new file mode 100644 index 000000000000..022943999b84 --- /dev/null +++ b/arch/x86/pci/mp_bus_to_node.c | |||
@@ -0,0 +1,23 @@ | |||
1 | #include <linux/pci.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <linux/topology.h> | ||
4 | |||
5 | #define BUS_NR 256 | ||
6 | |||
7 | static unsigned char mp_bus_to_node[BUS_NR]; | ||
8 | |||
9 | void set_mp_bus_to_node(int busnum, int node) | ||
10 | { | ||
11 | if (busnum >= 0 && busnum < BUS_NR) | ||
12 | mp_bus_to_node[busnum] = (unsigned char) node; | ||
13 | } | ||
14 | |||
15 | int get_mp_bus_to_node(int busnum) | ||
16 | { | ||
17 | int node; | ||
18 | |||
19 | if (busnum < 0 || busnum > (BUS_NR - 1)) | ||
20 | return 0; | ||
21 | node = mp_bus_to_node[busnum]; | ||
22 | return node; | ||
23 | } | ||