aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesse Barnes <jbarnes@virtuousgeek.org>2009-07-10 17:04:30 -0400
committerJesse Barnes <jbarnes@virtuousgeek.org>2009-09-09 16:29:21 -0400
commit2547089ca2db132e307ef68848ba029a8ec2f341 (patch)
treedb0079b322f964287571644c8376eb49bfcc68b7
parenteaa959df299157e2640fcb3321537501b6afd9e6 (diff)
x86/PCI: initialize PCI bus node numbers early
The current mp_bus_to_node array is initialized only by AMD specific code, since AMD platforms have registers that can be used for determining mode numbers. On new Intel platforms it's necessary to initialize this array as well though, otherwise all PCI node numbers will be 0, when in fact they should be -1 (indicating that I/O isn't tied to any particular node). So move the mp_bus_to_node code into the common PCI code, and initialize it early with a default value of -1. This may be overridden later by arch code (e.g. the AMD code). With this change, PCI consistent memory and other node specific allocations (e.g. skbuff allocs) should occur on the "current" node. If, for performance reasons, applications want to be bound to specific nodes, they should open their devices only after being pinned to the CPU where they'll run, for maximum locality. Acked-by: Yinghai Lu <yinghai@kernel.org> Tested-by: Jesse Brandeburg <jesse.brandeburg@gmail.com> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
-rw-r--r--arch/x86/pci/amd_bus.c64
-rw-r--r--arch/x86/pci/common.c69
2 files changed, 70 insertions, 63 deletions
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 3ffa10df20b9..572ee9782f2a 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -15,63 +15,6 @@
15 * also get peer root bus resource for io,mmio 15 * also get peer root bus resource for io,mmio
16 */ 16 */
17 17
18#ifdef CONFIG_NUMA
19
20#define BUS_NR 256
21
22#ifdef CONFIG_X86_64
23
24static int mp_bus_to_node[BUS_NR];
25
26void set_mp_bus_to_node(int busnum, int node)
27{
28 if (busnum >= 0 && busnum < BUS_NR)
29 mp_bus_to_node[busnum] = node;
30}
31
32int get_mp_bus_to_node(int busnum)
33{
34 int node = -1;
35
36 if (busnum < 0 || busnum > (BUS_NR - 1))
37 return node;
38
39 node = mp_bus_to_node[busnum];
40
41 /*
42 * let numa_node_id to decide it later in dma_alloc_pages
43 * if there is no ram on that node
44 */
45 if (node != -1 && !node_online(node))
46 node = -1;
47
48 return node;
49}
50
51#else /* CONFIG_X86_32 */
52
53static unsigned char mp_bus_to_node[BUS_NR];
54
55void set_mp_bus_to_node(int busnum, int node)
56{
57 if (busnum >= 0 && busnum < BUS_NR)
58 mp_bus_to_node[busnum] = (unsigned char) node;
59}
60
61int get_mp_bus_to_node(int busnum)
62{
63 int node;
64
65 if (busnum < 0 || busnum > (BUS_NR - 1))
66 return 0;
67 node = mp_bus_to_node[busnum];
68 return node;
69}
70
71#endif /* CONFIG_X86_32 */
72
73#endif /* CONFIG_NUMA */
74
75#ifdef CONFIG_X86_64 18#ifdef CONFIG_X86_64
76 19
77/* 20/*
@@ -301,11 +244,6 @@ static int __init early_fill_mp_bus_info(void)
301 u64 val; 244 u64 val;
302 u32 address; 245 u32 address;
303 246
304#ifdef CONFIG_NUMA
305 for (i = 0; i < BUS_NR; i++)
306 mp_bus_to_node[i] = -1;
307#endif
308
309 if (!early_pci_allowed()) 247 if (!early_pci_allowed())
310 return -1; 248 return -1;
311 249
@@ -346,7 +284,7 @@ static int __init early_fill_mp_bus_info(void)
346 node = (reg >> 4) & 0x07; 284 node = (reg >> 4) & 0x07;
347#ifdef CONFIG_NUMA 285#ifdef CONFIG_NUMA
348 for (j = min_bus; j <= max_bus; j++) 286 for (j = min_bus; j <= max_bus; j++)
349 mp_bus_to_node[j] = (unsigned char) node; 287 set_mp_bus_to_node(j, node);
350#endif 288#endif
351 link = (reg >> 8) & 0x03; 289 link = (reg >> 8) & 0x03;
352 290
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 2202b6257b82..5db96d4304de 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -600,3 +600,72 @@ struct pci_bus * __devinit pci_scan_bus_with_sysdata(int busno)
600{ 600{
601 return pci_scan_bus_on_node(busno, &pci_root_ops, -1); 601 return pci_scan_bus_on_node(busno, &pci_root_ops, -1);
602} 602}
603
604/*
605 * NUMA info for PCI busses
606 *
607 * Early arch code is responsible for filling in reasonable values here.
608 * A node id of "-1" means "use current node". In other words, if a bus
609 * has a -1 node id, it's not tightly coupled to any particular chunk
610 * of memory (as is the case on some Nehalem systems).
611 */
612#ifdef CONFIG_NUMA
613
614#define BUS_NR 256
615
616#ifdef CONFIG_X86_64
617
618static int mp_bus_to_node[BUS_NR] = {
619 [0 ... BUS_NR - 1] = -1
620};
621
622void set_mp_bus_to_node(int busnum, int node)
623{
624 if (busnum >= 0 && busnum < BUS_NR)
625 mp_bus_to_node[busnum] = node;
626}
627
628int get_mp_bus_to_node(int busnum)
629{
630 int node = -1;
631
632 if (busnum < 0 || busnum > (BUS_NR - 1))
633 return node;
634
635 node = mp_bus_to_node[busnum];
636
637 /*
638 * let numa_node_id to decide it later in dma_alloc_pages
639 * if there is no ram on that node
640 */
641 if (node != -1 && !node_online(node))
642 node = -1;
643
644 return node;
645}
646
647#else /* CONFIG_X86_32 */
648
649static unsigned char mp_bus_to_node[BUS_NR] = {
650 [0 ... BUS_NR - 1] = -1
651};
652
653void set_mp_bus_to_node(int busnum, int node)
654{
655 if (busnum >= 0 && busnum < BUS_NR)
656 mp_bus_to_node[busnum] = (unsigned char) node;
657}
658
659int get_mp_bus_to_node(int busnum)
660{
661 int node;
662
663 if (busnum < 0 || busnum > (BUS_NR - 1))
664 return 0;
665 node = mp_bus_to_node[busnum];
666 return node;
667}
668
669#endif /* CONFIG_X86_32 */
670
671#endif /* CONFIG_NUMA */