aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2005-09-12 12:49:24 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-12 13:49:56 -0400
commit3f098c2605bdf50176b26f4fa724e9b9c99e5242 (patch)
treed4632bc555c52afed6ae4c0444659dca49e3e406 /arch/x86_64
parentb91691164be174b780f5c1bb145a7ab5d33fce2f (diff)
[PATCH] x86-64: Support dualcore and 8 socket systems in k8 fallback node parsing
In particular on systems where the local APIC space and node space is very different from the Linux CPU number space. Previously the older NUMA setup code directly parsing the K8 northbridge registers had some issues on 8 socket or dual core systems. This patch fixes them. This is mainly done by fixing some confusion between Linux CPU numbers and local APIC ids. We now pass the local APIC IDs to later code, which avoids mismatches. Also add some heuristics to detect cases where the Hypertransport nodeids and the local APIC IDs don't match, but are shifted by a constant offset. This is still all quite hackish, hopefully BIOS writers fill in correct SRATs instead. Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/kernel/setup.c64
-rw-r--r--arch/x86_64/mm/k8topology.c13
-rw-r--r--arch/x86_64/mm/numa.c10
3 files changed, 62 insertions, 25 deletions
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 976ebcf96f3e..df55a63f8866 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -755,6 +755,24 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
755 } 755 }
756} 756}
757 757
758#ifdef CONFIG_NUMA
759static int nearby_node(int apicid)
760{
761 int i;
762 for (i = apicid - 1; i >= 0; i--) {
763 int node = apicid_to_node[i];
764 if (node != NUMA_NO_NODE && node_online(node))
765 return node;
766 }
767 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
768 int node = apicid_to_node[i];
769 if (node != NUMA_NO_NODE && node_online(node))
770 return node;
771 }
772 return first_node(node_online_map); /* Shouldn't happen */
773}
774#endif
775
758/* 776/*
759 * On a AMD dual core setup the lower bits of the APIC id distingush the cores. 777 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
760 * Assumes number of cores is a power of two. 778 * Assumes number of cores is a power of two.
@@ -763,9 +781,11 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
763{ 781{
764#ifdef CONFIG_SMP 782#ifdef CONFIG_SMP
765 int cpu = smp_processor_id(); 783 int cpu = smp_processor_id();
766 int node = 0;
767 unsigned bits; 784 unsigned bits;
785#ifdef CONFIG_NUMA
786 int node = 0;
768 unsigned apicid = phys_proc_id[cpu]; 787 unsigned apicid = phys_proc_id[cpu];
788#endif
769 789
770 bits = 0; 790 bits = 0;
771 while ((1 << bits) < c->x86_num_cores) 791 while ((1 << bits) < c->x86_num_cores)
@@ -777,24 +797,32 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
777 phys_proc_id[cpu] >>= bits; 797 phys_proc_id[cpu] >>= bits;
778 798
779#ifdef CONFIG_NUMA 799#ifdef CONFIG_NUMA
780 /* When an ACPI SRAT table is available use the mappings from SRAT 800 node = phys_proc_id[cpu];
781 instead. */ 801 if (apicid_to_node[apicid] != NUMA_NO_NODE)
782 node = phys_proc_id[cpu]; 802 node = apicid_to_node[apicid];
783 if (acpi_numa > 0) { 803 if (!node_online(node)) {
784 if (apicid_to_node[apicid] != NUMA_NO_NODE) 804 /* Two possibilities here:
785 node = apicid_to_node[apicid]; 805 - The CPU is missing memory and no node was created.
786 else 806 In that case try picking one from a nearby CPU
787 printk(KERN_ERR 807 - The APIC IDs differ from the HyperTransport node IDs
788 "SRAT: Didn't specify node for CPU %d(%d)\n", 808 which the K8 northbridge parsing fills in.
789 cpu, apicid); 809 Assume they are all increased by a constant offset,
790 } 810 but in the same order as the HT nodeids.
791 if (!node_online(node)) 811 If that doesn't result in a usable node fall back to the
792 node = first_node(node_online_map); 812 path for the previous case. */
793 cpu_to_node[cpu] = node; 813 int ht_nodeid = apicid - (phys_proc_id[0] << bits);
814 if (ht_nodeid >= 0 &&
815 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
816 node = apicid_to_node[ht_nodeid];
817 /* Pick a nearby node */
818 if (!node_online(node))
819 node = nearby_node(apicid);
820 }
821 cpu_to_node[cpu] = node;
822
823 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
824 cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
794#endif 825#endif
795
796 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
797 cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
798#endif 826#endif
799} 827}
800 828
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c
index ec35747aacd7..65417b040c1b 100644
--- a/arch/x86_64/mm/k8topology.c
+++ b/arch/x86_64/mm/k8topology.c
@@ -45,10 +45,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
45 unsigned long prevbase; 45 unsigned long prevbase;
46 struct node nodes[8]; 46 struct node nodes[8];
47 int nodeid, i, nb; 47 int nodeid, i, nb;
48 unsigned char nodeids[8];
48 int found = 0; 49 int found = 0;
49 u32 reg; 50 u32 reg;
50 unsigned numnodes; 51 unsigned numnodes;
51 nodemask_t nodes_parsed; 52 nodemask_t nodes_parsed;
53 unsigned dualcore = 0;
52 54
53 nodes_clear(nodes_parsed); 55 nodes_clear(nodes_parsed);
54 56
@@ -67,11 +69,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
67 prevbase = 0; 69 prevbase = 0;
68 for (i = 0; i < 8; i++) { 70 for (i = 0; i < 8; i++) {
69 unsigned long base,limit; 71 unsigned long base,limit;
70 72 u32 nodeid;
73
74 /* Undefined before E stepping, but hopefully 0 */
75 dualcore |= ((read_pci_config(0, nb, 3, 0xe8) >> 12) & 3) == 1;
71 base = read_pci_config(0, nb, 1, 0x40 + i*8); 76 base = read_pci_config(0, nb, 1, 0x40 + i*8);
72 limit = read_pci_config(0, nb, 1, 0x44 + i*8); 77 limit = read_pci_config(0, nb, 1, 0x44 + i*8);
73 78
74 nodeid = limit & 7; 79 nodeid = limit & 7;
80 nodeids[i] = nodeid;
75 if ((base & 3) == 0) { 81 if ((base & 3) == 0) {
76 if (i < numnodes) 82 if (i < numnodes)
77 printk("Skipping disabled node %d\n", i); 83 printk("Skipping disabled node %d\n", i);
@@ -157,8 +163,9 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
157 163
158 for (i = 0; i < 8; i++) { 164 for (i = 0; i < 8; i++) {
159 if (nodes[i].start != nodes[i].end) { 165 if (nodes[i].start != nodes[i].end) {
160 /* assume 1:1 NODE:CPU */ 166 nodeid = nodeids[i];
161 cpu_to_node[i] = i; 167 apicid_to_node[nodeid << dualcore] = i;
168 apicid_to_node[(nodeid << dualcore) + dualcore] = i;
162 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 169 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
163 } 170 }
164 } 171 }
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 5b1518629893..80a49d9bd8a7 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -28,11 +28,13 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES];
28int memnode_shift; 28int memnode_shift;
29u8 memnodemap[NODEMAPSIZE]; 29u8 memnodemap[NODEMAPSIZE];
30 30
31unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; 31unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
32unsigned char apicid_to_node[256] __cpuinitdata = { 32 [0 ... NR_CPUS-1] = NUMA_NO_NODE
33 [0 ... NR_CPUS-1] = NUMA_NO_NODE
34}; 33};
35cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; 34unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
35 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
36};
37cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
36 38
37int numa_off __initdata; 39int numa_off __initdata;
38 40