aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/mm/numa.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/mm/numa.c')
-rw-r--r--arch/x86_64/mm/numa.c122
1 files changed, 73 insertions, 49 deletions
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 214803821001..a828a01739cc 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -38,38 +38,57 @@ cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
38 38
39int numa_off __initdata; 39int numa_off __initdata;
40 40
41int __init compute_hash_shift(struct node *nodes, int numnodes) 41
42/*
43 * Given a shift value, try to populate memnodemap[]
44 * Returns :
45 * 1 if OK
46 * 0 if memnodmap[] too small (of shift too small)
47 * -1 if node overlap or lost ram (shift too big)
48 */
49static int __init populate_memnodemap(
50 const struct node *nodes, int numnodes, int shift)
42{ 51{
43 int i; 52 int i;
44 int shift = 20; 53 int res = -1;
45 unsigned long addr,maxend=0; 54 unsigned long addr, end;
46
47 for (i = 0; i < numnodes; i++)
48 if ((nodes[i].start != nodes[i].end) && (nodes[i].end > maxend))
49 maxend = nodes[i].end;
50 55
51 while ((1UL << shift) < (maxend / NODEMAPSIZE)) 56 memset(memnodemap, 0xff, sizeof(memnodemap));
52 shift++;
53
54 printk (KERN_DEBUG"Using %d for the hash shift. Max adder is %lx \n",
55 shift,maxend);
56 memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE);
57 for (i = 0; i < numnodes; i++) { 57 for (i = 0; i < numnodes; i++) {
58 if (nodes[i].start == nodes[i].end) 58 addr = nodes[i].start;
59 end = nodes[i].end;
60 if (addr >= end)
59 continue; 61 continue;
60 for (addr = nodes[i].start; 62 if ((end >> shift) >= NODEMAPSIZE)
61 addr < nodes[i].end; 63 return 0;
62 addr += (1UL << shift)) { 64 do {
63 if (memnodemap[addr >> shift] != 0xff) { 65 if (memnodemap[addr >> shift] != 0xff)
64 printk(KERN_INFO
65 "Your memory is not aligned you need to rebuild your kernel "
66 "with a bigger NODEMAPSIZE shift=%d adder=%lu\n",
67 shift,addr);
68 return -1; 66 return -1;
69 }
70 memnodemap[addr >> shift] = i; 67 memnodemap[addr >> shift] = i;
71 } 68 addr += (1 << shift);
69 } while (addr < end);
70 res = 1;
72 } 71 }
72 return res;
73}
74
75int __init compute_hash_shift(struct node *nodes, int numnodes)
76{
77 int shift = 20;
78
79 while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0)
80 shift++;
81
82 printk(KERN_DEBUG "Using %d for the hash shift.\n",
83 shift);
84
85 if (populate_memnodemap(nodes, numnodes, shift) != 1) {
86 printk(KERN_INFO
87 "Your memory is not aligned you need to rebuild your kernel "
88 "with a bigger NODEMAPSIZE shift=%d\n",
89 shift);
90 return -1;
91 }
73 return shift; 92 return shift;
74} 93}
75 94
@@ -94,7 +113,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
94 start_pfn = start >> PAGE_SHIFT; 113 start_pfn = start >> PAGE_SHIFT;
95 end_pfn = end >> PAGE_SHIFT; 114 end_pfn = end >> PAGE_SHIFT;
96 115
97 memory_present(nodeid, start_pfn, end_pfn);
98 nodedata_phys = find_e820_area(start, end, pgdat_size); 116 nodedata_phys = find_e820_area(start, end, pgdat_size);
99 if (nodedata_phys == -1L) 117 if (nodedata_phys == -1L)
100 panic("Cannot find memory pgdat in node %d\n", nodeid); 118 panic("Cannot find memory pgdat in node %d\n", nodeid);
@@ -132,29 +150,14 @@ void __init setup_node_zones(int nodeid)
132 unsigned long start_pfn, end_pfn; 150 unsigned long start_pfn, end_pfn;
133 unsigned long zones[MAX_NR_ZONES]; 151 unsigned long zones[MAX_NR_ZONES];
134 unsigned long holes[MAX_NR_ZONES]; 152 unsigned long holes[MAX_NR_ZONES];
135 unsigned long dma_end_pfn;
136 153
137 memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); 154 start_pfn = node_start_pfn(nodeid);
138 memset(holes, 0, sizeof(unsigned long) * MAX_NR_ZONES); 155 end_pfn = node_end_pfn(nodeid);
139 156
140 start_pfn = node_start_pfn(nodeid); 157 Dprintk(KERN_INFO "setting up node %d %lx-%lx\n",
141 end_pfn = node_end_pfn(nodeid); 158 nodeid, start_pfn, end_pfn);
142 159
143 Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); 160 size_zones(zones, holes, start_pfn, end_pfn);
144
145 /* All nodes > 0 have a zero length zone DMA */
146 dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT;
147 if (start_pfn < dma_end_pfn) {
148 zones[ZONE_DMA] = dma_end_pfn - start_pfn;
149 holes[ZONE_DMA] = e820_hole_size(start_pfn, dma_end_pfn);
150 zones[ZONE_NORMAL] = end_pfn - dma_end_pfn;
151 holes[ZONE_NORMAL] = e820_hole_size(dma_end_pfn, end_pfn);
152
153 } else {
154 zones[ZONE_NORMAL] = end_pfn - start_pfn;
155 holes[ZONE_NORMAL] = e820_hole_size(start_pfn, end_pfn);
156 }
157
158 free_area_init_node(nodeid, NODE_DATA(nodeid), zones, 161 free_area_init_node(nodeid, NODE_DATA(nodeid), zones,
159 start_pfn, holes); 162 start_pfn, holes);
160} 163}
@@ -171,7 +174,7 @@ void __init numa_init_array(void)
171 for (i = 0; i < NR_CPUS; i++) { 174 for (i = 0; i < NR_CPUS; i++) {
172 if (cpu_to_node[i] != NUMA_NO_NODE) 175 if (cpu_to_node[i] != NUMA_NO_NODE)
173 continue; 176 continue;
174 cpu_to_node[i] = rr; 177 numa_set_node(i, rr);
175 rr = next_node(rr, node_online_map); 178 rr = next_node(rr, node_online_map);
176 if (rr == MAX_NUMNODES) 179 if (rr == MAX_NUMNODES)
177 rr = first_node(node_online_map); 180 rr = first_node(node_online_map);
@@ -205,8 +208,6 @@ static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
205 if (i == numa_fake-1) 208 if (i == numa_fake-1)
206 sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; 209 sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
207 nodes[i].end = nodes[i].start + sz; 210 nodes[i].end = nodes[i].start + sz;
208 if (i != numa_fake-1)
209 nodes[i].end--;
210 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", 211 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n",
211 i, 212 i,
212 nodes[i].start, nodes[i].end, 213 nodes[i].start, nodes[i].end,
@@ -257,7 +258,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
257 nodes_clear(node_online_map); 258 nodes_clear(node_online_map);
258 node_set_online(0); 259 node_set_online(0);
259 for (i = 0; i < NR_CPUS; i++) 260 for (i = 0; i < NR_CPUS; i++)
260 cpu_to_node[i] = 0; 261 numa_set_node(i, 0);
261 node_to_cpumask[0] = cpumask_of_cpu(0); 262 node_to_cpumask[0] = cpumask_of_cpu(0);
262 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); 263 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
263} 264}
@@ -267,6 +268,12 @@ __cpuinit void numa_add_cpu(int cpu)
267 set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); 268 set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
268} 269}
269 270
271void __cpuinit numa_set_node(int cpu, int node)
272{
273 cpu_pda[cpu].nodenumber = node;
274 cpu_to_node[cpu] = node;
275}
276
270unsigned long __init numa_free_all_bootmem(void) 277unsigned long __init numa_free_all_bootmem(void)
271{ 278{
272 int i; 279 int i;
@@ -277,9 +284,26 @@ unsigned long __init numa_free_all_bootmem(void)
277 return pages; 284 return pages;
278} 285}
279 286
287#ifdef CONFIG_SPARSEMEM
288static void __init arch_sparse_init(void)
289{
290 int i;
291
292 for_each_online_node(i)
293 memory_present(i, node_start_pfn(i), node_end_pfn(i));
294
295 sparse_init();
296}
297#else
298#define arch_sparse_init() do {} while (0)
299#endif
300
280void __init paging_init(void) 301void __init paging_init(void)
281{ 302{
282 int i; 303 int i;
304
305 arch_sparse_init();
306
283 for_each_online_node(i) { 307 for_each_online_node(i) {
284 setup_node_zones(i); 308 setup_node_zones(i);
285 } 309 }