diff options
Diffstat (limited to 'arch/x86_64/mm/numa.c')
| -rw-r--r-- | arch/x86_64/mm/numa.c | 122 |
1 files changed, 73 insertions, 49 deletions
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 214803821001..a828a01739cc 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c | |||
| @@ -38,38 +38,57 @@ cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; | |||
| 38 | 38 | ||
| 39 | int numa_off __initdata; | 39 | int numa_off __initdata; |
| 40 | 40 | ||
| 41 | int __init compute_hash_shift(struct node *nodes, int numnodes) | 41 | |
| 42 | /* | ||
| 43 | * Given a shift value, try to populate memnodemap[] | ||
| 44 | * Returns : | ||
| 45 | * 1 if OK | ||
| 46 | * 0 if memnodmap[] too small (of shift too small) | ||
| 47 | * -1 if node overlap or lost ram (shift too big) | ||
| 48 | */ | ||
| 49 | static int __init populate_memnodemap( | ||
| 50 | const struct node *nodes, int numnodes, int shift) | ||
| 42 | { | 51 | { |
| 43 | int i; | 52 | int i; |
| 44 | int shift = 20; | 53 | int res = -1; |
| 45 | unsigned long addr,maxend=0; | 54 | unsigned long addr, end; |
| 46 | |||
| 47 | for (i = 0; i < numnodes; i++) | ||
| 48 | if ((nodes[i].start != nodes[i].end) && (nodes[i].end > maxend)) | ||
| 49 | maxend = nodes[i].end; | ||
| 50 | 55 | ||
| 51 | while ((1UL << shift) < (maxend / NODEMAPSIZE)) | 56 | memset(memnodemap, 0xff, sizeof(memnodemap)); |
| 52 | shift++; | ||
| 53 | |||
| 54 | printk (KERN_DEBUG"Using %d for the hash shift. Max adder is %lx \n", | ||
| 55 | shift,maxend); | ||
| 56 | memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE); | ||
| 57 | for (i = 0; i < numnodes; i++) { | 57 | for (i = 0; i < numnodes; i++) { |
| 58 | if (nodes[i].start == nodes[i].end) | 58 | addr = nodes[i].start; |
| 59 | end = nodes[i].end; | ||
| 60 | if (addr >= end) | ||
| 59 | continue; | 61 | continue; |
| 60 | for (addr = nodes[i].start; | 62 | if ((end >> shift) >= NODEMAPSIZE) |
| 61 | addr < nodes[i].end; | 63 | return 0; |
| 62 | addr += (1UL << shift)) { | 64 | do { |
| 63 | if (memnodemap[addr >> shift] != 0xff) { | 65 | if (memnodemap[addr >> shift] != 0xff) |
| 64 | printk(KERN_INFO | ||
| 65 | "Your memory is not aligned you need to rebuild your kernel " | ||
| 66 | "with a bigger NODEMAPSIZE shift=%d adder=%lu\n", | ||
| 67 | shift,addr); | ||
| 68 | return -1; | 66 | return -1; |
| 69 | } | ||
| 70 | memnodemap[addr >> shift] = i; | 67 | memnodemap[addr >> shift] = i; |
| 71 | } | 68 | addr += (1 << shift); |
| 69 | } while (addr < end); | ||
| 70 | res = 1; | ||
| 72 | } | 71 | } |
| 72 | return res; | ||
| 73 | } | ||
| 74 | |||
| 75 | int __init compute_hash_shift(struct node *nodes, int numnodes) | ||
| 76 | { | ||
| 77 | int shift = 20; | ||
| 78 | |||
| 79 | while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0) | ||
| 80 | shift++; | ||
| 81 | |||
| 82 | printk(KERN_DEBUG "Using %d for the hash shift.\n", | ||
| 83 | shift); | ||
| 84 | |||
| 85 | if (populate_memnodemap(nodes, numnodes, shift) != 1) { | ||
| 86 | printk(KERN_INFO | ||
| 87 | "Your memory is not aligned you need to rebuild your kernel " | ||
| 88 | "with a bigger NODEMAPSIZE shift=%d\n", | ||
| 89 | shift); | ||
| 90 | return -1; | ||
| 91 | } | ||
| 73 | return shift; | 92 | return shift; |
| 74 | } | 93 | } |
| 75 | 94 | ||
| @@ -94,7 +113,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en | |||
| 94 | start_pfn = start >> PAGE_SHIFT; | 113 | start_pfn = start >> PAGE_SHIFT; |
| 95 | end_pfn = end >> PAGE_SHIFT; | 114 | end_pfn = end >> PAGE_SHIFT; |
| 96 | 115 | ||
| 97 | memory_present(nodeid, start_pfn, end_pfn); | ||
| 98 | nodedata_phys = find_e820_area(start, end, pgdat_size); | 116 | nodedata_phys = find_e820_area(start, end, pgdat_size); |
| 99 | if (nodedata_phys == -1L) | 117 | if (nodedata_phys == -1L) |
| 100 | panic("Cannot find memory pgdat in node %d\n", nodeid); | 118 | panic("Cannot find memory pgdat in node %d\n", nodeid); |
| @@ -132,29 +150,14 @@ void __init setup_node_zones(int nodeid) | |||
| 132 | unsigned long start_pfn, end_pfn; | 150 | unsigned long start_pfn, end_pfn; |
| 133 | unsigned long zones[MAX_NR_ZONES]; | 151 | unsigned long zones[MAX_NR_ZONES]; |
| 134 | unsigned long holes[MAX_NR_ZONES]; | 152 | unsigned long holes[MAX_NR_ZONES]; |
| 135 | unsigned long dma_end_pfn; | ||
| 136 | 153 | ||
| 137 | memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); | 154 | start_pfn = node_start_pfn(nodeid); |
| 138 | memset(holes, 0, sizeof(unsigned long) * MAX_NR_ZONES); | 155 | end_pfn = node_end_pfn(nodeid); |
| 139 | 156 | ||
| 140 | start_pfn = node_start_pfn(nodeid); | 157 | Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", |
| 141 | end_pfn = node_end_pfn(nodeid); | 158 | nodeid, start_pfn, end_pfn); |
| 142 | 159 | ||
| 143 | Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); | 160 | size_zones(zones, holes, start_pfn, end_pfn); |
| 144 | |||
| 145 | /* All nodes > 0 have a zero length zone DMA */ | ||
| 146 | dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; | ||
| 147 | if (start_pfn < dma_end_pfn) { | ||
| 148 | zones[ZONE_DMA] = dma_end_pfn - start_pfn; | ||
| 149 | holes[ZONE_DMA] = e820_hole_size(start_pfn, dma_end_pfn); | ||
| 150 | zones[ZONE_NORMAL] = end_pfn - dma_end_pfn; | ||
| 151 | holes[ZONE_NORMAL] = e820_hole_size(dma_end_pfn, end_pfn); | ||
| 152 | |||
| 153 | } else { | ||
| 154 | zones[ZONE_NORMAL] = end_pfn - start_pfn; | ||
| 155 | holes[ZONE_NORMAL] = e820_hole_size(start_pfn, end_pfn); | ||
| 156 | } | ||
| 157 | |||
| 158 | free_area_init_node(nodeid, NODE_DATA(nodeid), zones, | 161 | free_area_init_node(nodeid, NODE_DATA(nodeid), zones, |
| 159 | start_pfn, holes); | 162 | start_pfn, holes); |
| 160 | } | 163 | } |
| @@ -171,7 +174,7 @@ void __init numa_init_array(void) | |||
| 171 | for (i = 0; i < NR_CPUS; i++) { | 174 | for (i = 0; i < NR_CPUS; i++) { |
| 172 | if (cpu_to_node[i] != NUMA_NO_NODE) | 175 | if (cpu_to_node[i] != NUMA_NO_NODE) |
| 173 | continue; | 176 | continue; |
| 174 | cpu_to_node[i] = rr; | 177 | numa_set_node(i, rr); |
| 175 | rr = next_node(rr, node_online_map); | 178 | rr = next_node(rr, node_online_map); |
| 176 | if (rr == MAX_NUMNODES) | 179 | if (rr == MAX_NUMNODES) |
| 177 | rr = first_node(node_online_map); | 180 | rr = first_node(node_online_map); |
| @@ -205,8 +208,6 @@ static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | |||
| 205 | if (i == numa_fake-1) | 208 | if (i == numa_fake-1) |
| 206 | sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; | 209 | sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; |
| 207 | nodes[i].end = nodes[i].start + sz; | 210 | nodes[i].end = nodes[i].start + sz; |
| 208 | if (i != numa_fake-1) | ||
| 209 | nodes[i].end--; | ||
| 210 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", | 211 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", |
| 211 | i, | 212 | i, |
| 212 | nodes[i].start, nodes[i].end, | 213 | nodes[i].start, nodes[i].end, |
| @@ -257,7 +258,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
| 257 | nodes_clear(node_online_map); | 258 | nodes_clear(node_online_map); |
| 258 | node_set_online(0); | 259 | node_set_online(0); |
| 259 | for (i = 0; i < NR_CPUS; i++) | 260 | for (i = 0; i < NR_CPUS; i++) |
| 260 | cpu_to_node[i] = 0; | 261 | numa_set_node(i, 0); |
| 261 | node_to_cpumask[0] = cpumask_of_cpu(0); | 262 | node_to_cpumask[0] = cpumask_of_cpu(0); |
| 262 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); | 263 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); |
| 263 | } | 264 | } |
| @@ -267,6 +268,12 @@ __cpuinit void numa_add_cpu(int cpu) | |||
| 267 | set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); | 268 | set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); |
| 268 | } | 269 | } |
| 269 | 270 | ||
| 271 | void __cpuinit numa_set_node(int cpu, int node) | ||
| 272 | { | ||
| 273 | cpu_pda[cpu].nodenumber = node; | ||
| 274 | cpu_to_node[cpu] = node; | ||
| 275 | } | ||
| 276 | |||
| 270 | unsigned long __init numa_free_all_bootmem(void) | 277 | unsigned long __init numa_free_all_bootmem(void) |
| 271 | { | 278 | { |
| 272 | int i; | 279 | int i; |
| @@ -277,9 +284,26 @@ unsigned long __init numa_free_all_bootmem(void) | |||
| 277 | return pages; | 284 | return pages; |
| 278 | } | 285 | } |
| 279 | 286 | ||
| 287 | #ifdef CONFIG_SPARSEMEM | ||
| 288 | static void __init arch_sparse_init(void) | ||
| 289 | { | ||
| 290 | int i; | ||
| 291 | |||
| 292 | for_each_online_node(i) | ||
| 293 | memory_present(i, node_start_pfn(i), node_end_pfn(i)); | ||
| 294 | |||
| 295 | sparse_init(); | ||
| 296 | } | ||
| 297 | #else | ||
| 298 | #define arch_sparse_init() do {} while (0) | ||
| 299 | #endif | ||
| 300 | |||
| 280 | void __init paging_init(void) | 301 | void __init paging_init(void) |
| 281 | { | 302 | { |
| 282 | int i; | 303 | int i; |
| 304 | |||
| 305 | arch_sparse_init(); | ||
| 306 | |||
| 283 | for_each_online_node(i) { | 307 | for_each_online_node(i) { |
| 284 | setup_node_zones(i); | 308 | setup_node_zones(i); |
| 285 | } | 309 | } |
