diff options
Diffstat (limited to 'arch/x86/mm/numa.c')
| -rw-r--r-- | arch/x86/mm/numa.c | 212 |
1 files changed, 209 insertions, 3 deletions
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index ebf6d7887a3..9559d360fde 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
| @@ -26,12 +26,50 @@ static __init int numa_setup(char *opt) | |||
| 26 | early_param("numa", numa_setup); | 26 | early_param("numa", numa_setup); |
| 27 | 27 | ||
| 28 | /* | 28 | /* |
| 29 | * Which logical CPUs are on which nodes | 29 | * apicid, cpu, node mappings |
| 30 | */ | 30 | */ |
| 31 | s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | ||
| 32 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | ||
| 33 | }; | ||
| 34 | |||
| 31 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; | 35 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; |
| 32 | EXPORT_SYMBOL(node_to_cpumask_map); | 36 | EXPORT_SYMBOL(node_to_cpumask_map); |
| 33 | 37 | ||
| 34 | /* | 38 | /* |
| 39 | * Map cpu index to node index | ||
| 40 | */ | ||
| 41 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); | ||
| 42 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); | ||
| 43 | |||
| 44 | void __cpuinit numa_set_node(int cpu, int node) | ||
| 45 | { | ||
| 46 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); | ||
| 47 | |||
| 48 | /* early setting, no percpu area yet */ | ||
| 49 | if (cpu_to_node_map) { | ||
| 50 | cpu_to_node_map[cpu] = node; | ||
| 51 | return; | ||
| 52 | } | ||
| 53 | |||
| 54 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | ||
| 55 | if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { | ||
| 56 | printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); | ||
| 57 | dump_stack(); | ||
| 58 | return; | ||
| 59 | } | ||
| 60 | #endif | ||
| 61 | per_cpu(x86_cpu_to_node_map, cpu) = node; | ||
| 62 | |||
| 63 | if (node != NUMA_NO_NODE) | ||
| 64 | set_cpu_numa_node(cpu, node); | ||
| 65 | } | ||
| 66 | |||
| 67 | void __cpuinit numa_clear_node(int cpu) | ||
| 68 | { | ||
| 69 | numa_set_node(cpu, NUMA_NO_NODE); | ||
| 70 | } | ||
| 71 | |||
| 72 | /* | ||
| 35 | * Allocate node_to_cpumask_map based on number of available nodes | 73 | * Allocate node_to_cpumask_map based on number of available nodes |
| 36 | * Requires node_possible_map to be valid. | 74 | * Requires node_possible_map to be valid. |
| 37 | * | 75 | * |
| @@ -57,7 +95,174 @@ void __init setup_node_to_cpumask_map(void) | |||
| 57 | pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); | 95 | pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); |
| 58 | } | 96 | } |
| 59 | 97 | ||
| 60 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | 98 | /* |
| 99 | * There are unfortunately some poorly designed mainboards around that | ||
| 100 | * only connect memory to a single CPU. This breaks the 1:1 cpu->node | ||
| 101 | * mapping. To avoid this fill in the mapping for all possible CPUs, | ||
| 102 | * as the number of CPUs is not known yet. We round robin the existing | ||
| 103 | * nodes. | ||
| 104 | */ | ||
| 105 | void __init numa_init_array(void) | ||
| 106 | { | ||
| 107 | int rr, i; | ||
| 108 | |||
| 109 | rr = first_node(node_online_map); | ||
| 110 | for (i = 0; i < nr_cpu_ids; i++) { | ||
| 111 | if (early_cpu_to_node(i) != NUMA_NO_NODE) | ||
| 112 | continue; | ||
| 113 | numa_set_node(i, rr); | ||
| 114 | rr = next_node(rr, node_online_map); | ||
| 115 | if (rr == MAX_NUMNODES) | ||
| 116 | rr = first_node(node_online_map); | ||
| 117 | } | ||
| 118 | } | ||
| 119 | |||
| 120 | static __init int find_near_online_node(int node) | ||
| 121 | { | ||
| 122 | int n, val; | ||
| 123 | int min_val = INT_MAX; | ||
| 124 | int best_node = -1; | ||
| 125 | |||
| 126 | for_each_online_node(n) { | ||
| 127 | val = node_distance(node, n); | ||
| 128 | |||
| 129 | if (val < min_val) { | ||
| 130 | min_val = val; | ||
| 131 | best_node = n; | ||
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | return best_node; | ||
| 136 | } | ||
| 137 | |||
| 138 | /* | ||
| 139 | * Setup early cpu_to_node. | ||
| 140 | * | ||
| 141 | * Populate cpu_to_node[] only if x86_cpu_to_apicid[], | ||
| 142 | * and apicid_to_node[] tables have valid entries for a CPU. | ||
| 143 | * This means we skip cpu_to_node[] initialisation for NUMA | ||
| 144 | * emulation and faking node case (when running a kernel compiled | ||
| 145 | * for NUMA on a non NUMA box), which is OK as cpu_to_node[] | ||
| 146 | * is already initialized in a round robin manner at numa_init_array, | ||
| 147 | * prior to this call, and this initialization is good enough | ||
| 148 | * for the fake NUMA cases. | ||
| 149 | * | ||
| 150 | * Called before the per_cpu areas are setup. | ||
| 151 | */ | ||
| 152 | void __init init_cpu_to_node(void) | ||
| 153 | { | ||
| 154 | int cpu; | ||
| 155 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); | ||
| 156 | |||
| 157 | BUG_ON(cpu_to_apicid == NULL); | ||
| 158 | |||
| 159 | for_each_possible_cpu(cpu) { | ||
| 160 | int node = numa_cpu_node(cpu); | ||
| 161 | |||
| 162 | if (node == NUMA_NO_NODE) | ||
| 163 | continue; | ||
| 164 | if (!node_online(node)) | ||
| 165 | node = find_near_online_node(node); | ||
| 166 | numa_set_node(cpu, node); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS | ||
| 171 | |||
| 172 | # ifndef CONFIG_NUMA_EMU | ||
| 173 | void __cpuinit numa_add_cpu(int cpu) | ||
| 174 | { | ||
| 175 | cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | ||
| 176 | } | ||
| 177 | |||
| 178 | void __cpuinit numa_remove_cpu(int cpu) | ||
| 179 | { | ||
| 180 | cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | ||
| 181 | } | ||
| 182 | # endif /* !CONFIG_NUMA_EMU */ | ||
| 183 | |||
| 184 | #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ | ||
| 185 | |||
| 186 | int __cpu_to_node(int cpu) | ||
| 187 | { | ||
| 188 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { | ||
| 189 | printk(KERN_WARNING | ||
| 190 | "cpu_to_node(%d): usage too early!\n", cpu); | ||
| 191 | dump_stack(); | ||
| 192 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
| 193 | } | ||
| 194 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
| 195 | } | ||
| 196 | EXPORT_SYMBOL(__cpu_to_node); | ||
| 197 | |||
| 198 | /* | ||
| 199 | * Same function as cpu_to_node() but used if called before the | ||
| 200 | * per_cpu areas are setup. | ||
| 201 | */ | ||
| 202 | int early_cpu_to_node(int cpu) | ||
| 203 | { | ||
| 204 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) | ||
| 205 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
| 206 | |||
| 207 | if (!cpu_possible(cpu)) { | ||
| 208 | printk(KERN_WARNING | ||
| 209 | "early_cpu_to_node(%d): no per_cpu area!\n", cpu); | ||
| 210 | dump_stack(); | ||
| 211 | return NUMA_NO_NODE; | ||
| 212 | } | ||
| 213 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
| 214 | } | ||
| 215 | |||
| 216 | struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable) | ||
| 217 | { | ||
| 218 | int node = early_cpu_to_node(cpu); | ||
| 219 | struct cpumask *mask; | ||
| 220 | char buf[64]; | ||
| 221 | |||
| 222 | if (node == NUMA_NO_NODE) { | ||
| 223 | /* early_cpu_to_node() already emits a warning and trace */ | ||
| 224 | return NULL; | ||
| 225 | } | ||
| 226 | mask = node_to_cpumask_map[node]; | ||
| 227 | if (!mask) { | ||
| 228 | pr_err("node_to_cpumask_map[%i] NULL\n", node); | ||
| 229 | dump_stack(); | ||
| 230 | return NULL; | ||
| 231 | } | ||
| 232 | |||
| 233 | cpulist_scnprintf(buf, sizeof(buf), mask); | ||
| 234 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | ||
| 235 | enable ? "numa_add_cpu" : "numa_remove_cpu", | ||
| 236 | cpu, node, buf); | ||
| 237 | return mask; | ||
| 238 | } | ||
| 239 | |||
| 240 | # ifndef CONFIG_NUMA_EMU | ||
| 241 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | ||
| 242 | { | ||
| 243 | struct cpumask *mask; | ||
| 244 | |||
| 245 | mask = debug_cpumask_set_cpu(cpu, enable); | ||
| 246 | if (!mask) | ||
| 247 | return; | ||
| 248 | |||
| 249 | if (enable) | ||
| 250 | cpumask_set_cpu(cpu, mask); | ||
| 251 | else | ||
| 252 | cpumask_clear_cpu(cpu, mask); | ||
| 253 | } | ||
| 254 | |||
| 255 | void __cpuinit numa_add_cpu(int cpu) | ||
| 256 | { | ||
| 257 | numa_set_cpumask(cpu, 1); | ||
| 258 | } | ||
| 259 | |||
| 260 | void __cpuinit numa_remove_cpu(int cpu) | ||
| 261 | { | ||
| 262 | numa_set_cpumask(cpu, 0); | ||
| 263 | } | ||
| 264 | # endif /* !CONFIG_NUMA_EMU */ | ||
| 265 | |||
| 61 | /* | 266 | /* |
| 62 | * Returns a pointer to the bitmask of CPUs on Node 'node'. | 267 | * Returns a pointer to the bitmask of CPUs on Node 'node'. |
| 63 | */ | 268 | */ |
| @@ -80,4 +285,5 @@ const struct cpumask *cpumask_of_node(int node) | |||
| 80 | return node_to_cpumask_map[node]; | 285 | return node_to_cpumask_map[node]; |
| 81 | } | 286 | } |
| 82 | EXPORT_SYMBOL(cpumask_of_node); | 287 | EXPORT_SYMBOL(cpumask_of_node); |
| 83 | #endif | 288 | |
| 289 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ | ||
