diff options
Diffstat (limited to 'arch/x86_64/mm/numa.c')
-rw-r--r-- | arch/x86_64/mm/numa.c | 122 |
1 files changed, 73 insertions, 49 deletions
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 214803821001..a828a01739cc 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c | |||
@@ -38,38 +38,57 @@ cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; | |||
38 | 38 | ||
39 | int numa_off __initdata; | 39 | int numa_off __initdata; |
40 | 40 | ||
41 | int __init compute_hash_shift(struct node *nodes, int numnodes) | 41 | |
42 | /* | ||
43 | * Given a shift value, try to populate memnodemap[] | ||
44 | * Returns : | ||
45 | * 1 if OK | ||
46 | * 0 if memnodmap[] too small (of shift too small) | ||
47 | * -1 if node overlap or lost ram (shift too big) | ||
48 | */ | ||
49 | static int __init populate_memnodemap( | ||
50 | const struct node *nodes, int numnodes, int shift) | ||
42 | { | 51 | { |
43 | int i; | 52 | int i; |
44 | int shift = 20; | 53 | int res = -1; |
45 | unsigned long addr,maxend=0; | 54 | unsigned long addr, end; |
46 | |||
47 | for (i = 0; i < numnodes; i++) | ||
48 | if ((nodes[i].start != nodes[i].end) && (nodes[i].end > maxend)) | ||
49 | maxend = nodes[i].end; | ||
50 | 55 | ||
51 | while ((1UL << shift) < (maxend / NODEMAPSIZE)) | 56 | memset(memnodemap, 0xff, sizeof(memnodemap)); |
52 | shift++; | ||
53 | |||
54 | printk (KERN_DEBUG"Using %d for the hash shift. Max adder is %lx \n", | ||
55 | shift,maxend); | ||
56 | memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE); | ||
57 | for (i = 0; i < numnodes; i++) { | 57 | for (i = 0; i < numnodes; i++) { |
58 | if (nodes[i].start == nodes[i].end) | 58 | addr = nodes[i].start; |
59 | end = nodes[i].end; | ||
60 | if (addr >= end) | ||
59 | continue; | 61 | continue; |
60 | for (addr = nodes[i].start; | 62 | if ((end >> shift) >= NODEMAPSIZE) |
61 | addr < nodes[i].end; | 63 | return 0; |
62 | addr += (1UL << shift)) { | 64 | do { |
63 | if (memnodemap[addr >> shift] != 0xff) { | 65 | if (memnodemap[addr >> shift] != 0xff) |
64 | printk(KERN_INFO | ||
65 | "Your memory is not aligned you need to rebuild your kernel " | ||
66 | "with a bigger NODEMAPSIZE shift=%d adder=%lu\n", | ||
67 | shift,addr); | ||
68 | return -1; | 66 | return -1; |
69 | } | ||
70 | memnodemap[addr >> shift] = i; | 67 | memnodemap[addr >> shift] = i; |
71 | } | 68 | addr += (1 << shift); |
69 | } while (addr < end); | ||
70 | res = 1; | ||
72 | } | 71 | } |
72 | return res; | ||
73 | } | ||
74 | |||
75 | int __init compute_hash_shift(struct node *nodes, int numnodes) | ||
76 | { | ||
77 | int shift = 20; | ||
78 | |||
79 | while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0) | ||
80 | shift++; | ||
81 | |||
82 | printk(KERN_DEBUG "Using %d for the hash shift.\n", | ||
83 | shift); | ||
84 | |||
85 | if (populate_memnodemap(nodes, numnodes, shift) != 1) { | ||
86 | printk(KERN_INFO | ||
87 | "Your memory is not aligned you need to rebuild your kernel " | ||
88 | "with a bigger NODEMAPSIZE shift=%d\n", | ||
89 | shift); | ||
90 | return -1; | ||
91 | } | ||
73 | return shift; | 92 | return shift; |
74 | } | 93 | } |
75 | 94 | ||
@@ -94,7 +113,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en | |||
94 | start_pfn = start >> PAGE_SHIFT; | 113 | start_pfn = start >> PAGE_SHIFT; |
95 | end_pfn = end >> PAGE_SHIFT; | 114 | end_pfn = end >> PAGE_SHIFT; |
96 | 115 | ||
97 | memory_present(nodeid, start_pfn, end_pfn); | ||
98 | nodedata_phys = find_e820_area(start, end, pgdat_size); | 116 | nodedata_phys = find_e820_area(start, end, pgdat_size); |
99 | if (nodedata_phys == -1L) | 117 | if (nodedata_phys == -1L) |
100 | panic("Cannot find memory pgdat in node %d\n", nodeid); | 118 | panic("Cannot find memory pgdat in node %d\n", nodeid); |
@@ -132,29 +150,14 @@ void __init setup_node_zones(int nodeid) | |||
132 | unsigned long start_pfn, end_pfn; | 150 | unsigned long start_pfn, end_pfn; |
133 | unsigned long zones[MAX_NR_ZONES]; | 151 | unsigned long zones[MAX_NR_ZONES]; |
134 | unsigned long holes[MAX_NR_ZONES]; | 152 | unsigned long holes[MAX_NR_ZONES]; |
135 | unsigned long dma_end_pfn; | ||
136 | 153 | ||
137 | memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); | 154 | start_pfn = node_start_pfn(nodeid); |
138 | memset(holes, 0, sizeof(unsigned long) * MAX_NR_ZONES); | 155 | end_pfn = node_end_pfn(nodeid); |
139 | 156 | ||
140 | start_pfn = node_start_pfn(nodeid); | 157 | Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", |
141 | end_pfn = node_end_pfn(nodeid); | 158 | nodeid, start_pfn, end_pfn); |
142 | 159 | ||
143 | Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); | 160 | size_zones(zones, holes, start_pfn, end_pfn); |
144 | |||
145 | /* All nodes > 0 have a zero length zone DMA */ | ||
146 | dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; | ||
147 | if (start_pfn < dma_end_pfn) { | ||
148 | zones[ZONE_DMA] = dma_end_pfn - start_pfn; | ||
149 | holes[ZONE_DMA] = e820_hole_size(start_pfn, dma_end_pfn); | ||
150 | zones[ZONE_NORMAL] = end_pfn - dma_end_pfn; | ||
151 | holes[ZONE_NORMAL] = e820_hole_size(dma_end_pfn, end_pfn); | ||
152 | |||
153 | } else { | ||
154 | zones[ZONE_NORMAL] = end_pfn - start_pfn; | ||
155 | holes[ZONE_NORMAL] = e820_hole_size(start_pfn, end_pfn); | ||
156 | } | ||
157 | |||
158 | free_area_init_node(nodeid, NODE_DATA(nodeid), zones, | 161 | free_area_init_node(nodeid, NODE_DATA(nodeid), zones, |
159 | start_pfn, holes); | 162 | start_pfn, holes); |
160 | } | 163 | } |
@@ -171,7 +174,7 @@ void __init numa_init_array(void) | |||
171 | for (i = 0; i < NR_CPUS; i++) { | 174 | for (i = 0; i < NR_CPUS; i++) { |
172 | if (cpu_to_node[i] != NUMA_NO_NODE) | 175 | if (cpu_to_node[i] != NUMA_NO_NODE) |
173 | continue; | 176 | continue; |
174 | cpu_to_node[i] = rr; | 177 | numa_set_node(i, rr); |
175 | rr = next_node(rr, node_online_map); | 178 | rr = next_node(rr, node_online_map); |
176 | if (rr == MAX_NUMNODES) | 179 | if (rr == MAX_NUMNODES) |
177 | rr = first_node(node_online_map); | 180 | rr = first_node(node_online_map); |
@@ -205,8 +208,6 @@ static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | |||
205 | if (i == numa_fake-1) | 208 | if (i == numa_fake-1) |
206 | sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; | 209 | sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; |
207 | nodes[i].end = nodes[i].start + sz; | 210 | nodes[i].end = nodes[i].start + sz; |
208 | if (i != numa_fake-1) | ||
209 | nodes[i].end--; | ||
210 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", | 211 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", |
211 | i, | 212 | i, |
212 | nodes[i].start, nodes[i].end, | 213 | nodes[i].start, nodes[i].end, |
@@ -257,7 +258,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
257 | nodes_clear(node_online_map); | 258 | nodes_clear(node_online_map); |
258 | node_set_online(0); | 259 | node_set_online(0); |
259 | for (i = 0; i < NR_CPUS; i++) | 260 | for (i = 0; i < NR_CPUS; i++) |
260 | cpu_to_node[i] = 0; | 261 | numa_set_node(i, 0); |
261 | node_to_cpumask[0] = cpumask_of_cpu(0); | 262 | node_to_cpumask[0] = cpumask_of_cpu(0); |
262 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); | 263 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); |
263 | } | 264 | } |
@@ -267,6 +268,12 @@ __cpuinit void numa_add_cpu(int cpu) | |||
267 | set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); | 268 | set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); |
268 | } | 269 | } |
269 | 270 | ||
271 | void __cpuinit numa_set_node(int cpu, int node) | ||
272 | { | ||
273 | cpu_pda[cpu].nodenumber = node; | ||
274 | cpu_to_node[cpu] = node; | ||
275 | } | ||
276 | |||
270 | unsigned long __init numa_free_all_bootmem(void) | 277 | unsigned long __init numa_free_all_bootmem(void) |
271 | { | 278 | { |
272 | int i; | 279 | int i; |
@@ -277,9 +284,26 @@ unsigned long __init numa_free_all_bootmem(void) | |||
277 | return pages; | 284 | return pages; |
278 | } | 285 | } |
279 | 286 | ||
287 | #ifdef CONFIG_SPARSEMEM | ||
288 | static void __init arch_sparse_init(void) | ||
289 | { | ||
290 | int i; | ||
291 | |||
292 | for_each_online_node(i) | ||
293 | memory_present(i, node_start_pfn(i), node_end_pfn(i)); | ||
294 | |||
295 | sparse_init(); | ||
296 | } | ||
297 | #else | ||
298 | #define arch_sparse_init() do {} while (0) | ||
299 | #endif | ||
300 | |||
280 | void __init paging_init(void) | 301 | void __init paging_init(void) |
281 | { | 302 | { |
282 | int i; | 303 | int i; |
304 | |||
305 | arch_sparse_init(); | ||
306 | |||
283 | for_each_online_node(i) { | 307 | for_each_online_node(i) { |
284 | setup_node_zones(i); | 308 | setup_node_zones(i); |
285 | } | 309 | } |