diff options
Diffstat (limited to 'arch/x86/mm/numa_32.c')
-rw-r--r-- | arch/x86/mm/numa_32.c | 398 |
1 files changed, 102 insertions, 296 deletions
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index bde3906420df..849a975d3fa0 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -22,39 +22,11 @@ | |||
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/bootmem.h> | 25 | #include <linux/bootmem.h> |
27 | #include <linux/memblock.h> | 26 | #include <linux/memblock.h> |
28 | #include <linux/mmzone.h> | ||
29 | #include <linux/highmem.h> | ||
30 | #include <linux/initrd.h> | ||
31 | #include <linux/nodemask.h> | ||
32 | #include <linux/module.h> | 27 | #include <linux/module.h> |
33 | #include <linux/kexec.h> | ||
34 | #include <linux/pfn.h> | ||
35 | #include <linux/swap.h> | ||
36 | #include <linux/acpi.h> | ||
37 | |||
38 | #include <asm/e820.h> | ||
39 | #include <asm/setup.h> | ||
40 | #include <asm/mmzone.h> | ||
41 | #include <asm/bios_ebda.h> | ||
42 | #include <asm/proto.h> | ||
43 | |||
44 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | ||
45 | EXPORT_SYMBOL(node_data); | ||
46 | |||
47 | /* | ||
48 | * numa interface - we expect the numa architecture specific code to have | ||
49 | * populated the following initialisation. | ||
50 | * | ||
51 | * 1) node_online_map - the map of all nodes configured (online) in the system | ||
52 | * 2) node_start_pfn - the starting page frame number for a node | ||
53 | * 3) node_end_pfn - the ending page fram number for a node | ||
54 | */ | ||
55 | unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly; | ||
56 | unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; | ||
57 | 28 | ||
29 | #include "numa_internal.h" | ||
58 | 30 | ||
59 | #ifdef CONFIG_DISCONTIGMEM | 31 | #ifdef CONFIG_DISCONTIGMEM |
60 | /* | 32 | /* |
@@ -99,108 +71,46 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, | |||
99 | } | 71 | } |
100 | #endif | 72 | #endif |
101 | 73 | ||
102 | extern unsigned long find_max_low_pfn(void); | ||
103 | extern unsigned long highend_pfn, highstart_pfn; | 74 | extern unsigned long highend_pfn, highstart_pfn; |
104 | 75 | ||
105 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) | 76 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) |
106 | 77 | ||
107 | unsigned long node_remap_size[MAX_NUMNODES]; | ||
108 | static void *node_remap_start_vaddr[MAX_NUMNODES]; | 78 | static void *node_remap_start_vaddr[MAX_NUMNODES]; |
109 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 79 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
110 | 80 | ||
111 | static unsigned long kva_start_pfn; | ||
112 | static unsigned long kva_pages; | ||
113 | |||
114 | int __cpuinit numa_cpu_node(int cpu) | ||
115 | { | ||
116 | return apic->x86_32_numa_cpu_node(cpu); | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * FLAT - support for basic PC memory model with discontig enabled, essentially | ||
121 | * a single node with all available processors in it with a flat | ||
122 | * memory map. | ||
123 | */ | ||
124 | int __init get_memcfg_numa_flat(void) | ||
125 | { | ||
126 | printk(KERN_DEBUG "NUMA - single node, flat memory mode\n"); | ||
127 | |||
128 | node_start_pfn[0] = 0; | ||
129 | node_end_pfn[0] = max_pfn; | ||
130 | memblock_x86_register_active_regions(0, 0, max_pfn); | ||
131 | memory_present(0, 0, max_pfn); | ||
132 | node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); | ||
133 | |||
134 | /* Indicate there is one node available. */ | ||
135 | nodes_clear(node_online_map); | ||
136 | node_set_online(0); | ||
137 | return 1; | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * Find the highest page frame number we have available for the node | ||
142 | */ | ||
143 | static void __init propagate_e820_map_node(int nid) | ||
144 | { | ||
145 | if (node_end_pfn[nid] > max_pfn) | ||
146 | node_end_pfn[nid] = max_pfn; | ||
147 | /* | ||
148 | * if a user has given mem=XXXX, then we need to make sure | ||
149 | * that the node _starts_ before that, too, not just ends | ||
150 | */ | ||
151 | if (node_start_pfn[nid] > max_pfn) | ||
152 | node_start_pfn[nid] = max_pfn; | ||
153 | BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]); | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * Allocate memory for the pg_data_t for this node via a crude pre-bootmem | ||
158 | * method. For node zero take this from the bottom of memory, for | ||
159 | * subsequent nodes place them at node_remap_start_vaddr which contains | ||
160 | * node local data in physically node local memory. See setup_memory() | ||
161 | * for details. | ||
162 | */ | ||
163 | static void __init allocate_pgdat(int nid) | ||
164 | { | ||
165 | char buf[16]; | ||
166 | |||
167 | if (node_has_online_mem(nid) && node_remap_start_vaddr[nid]) | ||
168 | NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; | ||
169 | else { | ||
170 | unsigned long pgdat_phys; | ||
171 | pgdat_phys = memblock_find_in_range(min_low_pfn<<PAGE_SHIFT, | ||
172 | max_pfn_mapped<<PAGE_SHIFT, | ||
173 | sizeof(pg_data_t), | ||
174 | PAGE_SIZE); | ||
175 | NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT)); | ||
176 | memset(buf, 0, sizeof(buf)); | ||
177 | sprintf(buf, "NODE_DATA %d", nid); | ||
178 | memblock_x86_reserve_range(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf); | ||
179 | } | ||
180 | printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", | ||
181 | nid, (unsigned long)NODE_DATA(nid)); | ||
182 | } | ||
183 | |||
184 | /* | 81 | /* |
185 | * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel | 82 | * Remap memory allocator |
186 | * virtual address space (KVA) is reserved and portions of nodes are mapped | ||
187 | * using it. This is to allow node-local memory to be allocated for | ||
188 | * structures that would normally require ZONE_NORMAL. The memory is | ||
189 | * allocated with alloc_remap() and callers should be prepared to allocate | ||
190 | * from the bootmem allocator instead. | ||
191 | */ | 83 | */ |
192 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; | 84 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; |
193 | static void *node_remap_end_vaddr[MAX_NUMNODES]; | 85 | static void *node_remap_end_vaddr[MAX_NUMNODES]; |
194 | static void *node_remap_alloc_vaddr[MAX_NUMNODES]; | 86 | static void *node_remap_alloc_vaddr[MAX_NUMNODES]; |
195 | static unsigned long node_remap_offset[MAX_NUMNODES]; | ||
196 | 87 | ||
88 | /** | ||
89 | * alloc_remap - Allocate remapped memory | ||
90 | * @nid: NUMA node to allocate memory from | ||
91 | * @size: The size of allocation | ||
92 | * | ||
93 | * Allocate @size bytes from the remap area of NUMA node @nid. The | ||
94 | * size of the remap area is predetermined by init_alloc_remap() and | ||
95 | * only the callers considered there should call this function. For | ||
96 | * more info, please read the comment on top of init_alloc_remap(). | ||
97 | * | ||
98 | * The caller must be ready to handle allocation failure from this | ||
99 | * function and fall back to regular memory allocator in such cases. | ||
100 | * | ||
101 | * CONTEXT: | ||
102 | * Single CPU early boot context. | ||
103 | * | ||
104 | * RETURNS: | ||
105 | * Pointer to the allocated memory on success, %NULL on failure. | ||
106 | */ | ||
197 | void *alloc_remap(int nid, unsigned long size) | 107 | void *alloc_remap(int nid, unsigned long size) |
198 | { | 108 | { |
199 | void *allocation = node_remap_alloc_vaddr[nid]; | 109 | void *allocation = node_remap_alloc_vaddr[nid]; |
200 | 110 | ||
201 | size = ALIGN(size, L1_CACHE_BYTES); | 111 | size = ALIGN(size, L1_CACHE_BYTES); |
202 | 112 | ||
203 | if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) | 113 | if (!allocation || (allocation + size) > node_remap_end_vaddr[nid]) |
204 | return NULL; | 114 | return NULL; |
205 | 115 | ||
206 | node_remap_alloc_vaddr[nid] += size; | 116 | node_remap_alloc_vaddr[nid] += size; |
@@ -209,26 +119,6 @@ void *alloc_remap(int nid, unsigned long size) | |||
209 | return allocation; | 119 | return allocation; |
210 | } | 120 | } |
211 | 121 | ||
212 | static void __init remap_numa_kva(void) | ||
213 | { | ||
214 | void *vaddr; | ||
215 | unsigned long pfn; | ||
216 | int node; | ||
217 | |||
218 | for_each_online_node(node) { | ||
219 | printk(KERN_DEBUG "remap_numa_kva: node %d\n", node); | ||
220 | for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { | ||
221 | vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); | ||
222 | printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n", | ||
223 | (unsigned long)vaddr, | ||
224 | node_remap_start_pfn[node] + pfn); | ||
225 | set_pmd_pfn((ulong) vaddr, | ||
226 | node_remap_start_pfn[node] + pfn, | ||
227 | PAGE_KERNEL_LARGE); | ||
228 | } | ||
229 | } | ||
230 | } | ||
231 | |||
232 | #ifdef CONFIG_HIBERNATION | 122 | #ifdef CONFIG_HIBERNATION |
233 | /** | 123 | /** |
234 | * resume_map_numa_kva - add KVA mapping to the temporary page tables created | 124 | * resume_map_numa_kva - add KVA mapping to the temporary page tables created |
@@ -240,15 +130,16 @@ void resume_map_numa_kva(pgd_t *pgd_base) | |||
240 | int node; | 130 | int node; |
241 | 131 | ||
242 | for_each_online_node(node) { | 132 | for_each_online_node(node) { |
243 | unsigned long start_va, start_pfn, size, pfn; | 133 | unsigned long start_va, start_pfn, nr_pages, pfn; |
244 | 134 | ||
245 | start_va = (unsigned long)node_remap_start_vaddr[node]; | 135 | start_va = (unsigned long)node_remap_start_vaddr[node]; |
246 | start_pfn = node_remap_start_pfn[node]; | 136 | start_pfn = node_remap_start_pfn[node]; |
247 | size = node_remap_size[node]; | 137 | nr_pages = (node_remap_end_vaddr[node] - |
138 | node_remap_start_vaddr[node]) >> PAGE_SHIFT; | ||
248 | 139 | ||
249 | printk(KERN_DEBUG "%s: node %d\n", __func__, node); | 140 | printk(KERN_DEBUG "%s: node %d\n", __func__, node); |
250 | 141 | ||
251 | for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) { | 142 | for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) { |
252 | unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); | 143 | unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); |
253 | pgd_t *pgd = pgd_base + pgd_index(vaddr); | 144 | pgd_t *pgd = pgd_base + pgd_index(vaddr); |
254 | pud_t *pud = pud_offset(pgd, vaddr); | 145 | pud_t *pud = pud_offset(pgd, vaddr); |
@@ -264,132 +155,89 @@ void resume_map_numa_kva(pgd_t *pgd_base) | |||
264 | } | 155 | } |
265 | #endif | 156 | #endif |
266 | 157 | ||
267 | static __init unsigned long calculate_numa_remap_pages(void) | 158 | /** |
159 | * init_alloc_remap - Initialize remap allocator for a NUMA node | ||
160 | * @nid: NUMA node to initizlie remap allocator for | ||
161 | * | ||
162 | * NUMA nodes may end up without any lowmem. As allocating pgdat and | ||
163 | * memmap on a different node with lowmem is inefficient, a special | ||
164 | * remap allocator is implemented which can be used by alloc_remap(). | ||
165 | * | ||
166 | * For each node, the amount of memory which will be necessary for | ||
167 | * pgdat and memmap is calculated and two memory areas of the size are | ||
168 | * allocated - one in the node and the other in lowmem; then, the area | ||
169 | * in the node is remapped to the lowmem area. | ||
170 | * | ||
171 | * As pgdat and memmap must be allocated in lowmem anyway, this | ||
172 | * doesn't waste lowmem address space; however, the actual lowmem | ||
173 | * which gets remapped over is wasted. The amount shouldn't be | ||
174 | * problematic on machines this feature will be used. | ||
175 | * | ||
176 | * Initialization failure isn't fatal. alloc_remap() is used | ||
177 | * opportunistically and the callers will fall back to other memory | ||
178 | * allocation mechanisms on failure. | ||
179 | */ | ||
180 | void __init init_alloc_remap(int nid, u64 start, u64 end) | ||
268 | { | 181 | { |
269 | int nid; | 182 | unsigned long start_pfn = start >> PAGE_SHIFT; |
270 | unsigned long size, reserve_pages = 0; | 183 | unsigned long end_pfn = end >> PAGE_SHIFT; |
271 | 184 | unsigned long size, pfn; | |
272 | for_each_online_node(nid) { | 185 | u64 node_pa, remap_pa; |
273 | u64 node_kva_target; | 186 | void *remap_va; |
274 | u64 node_kva_final; | ||
275 | |||
276 | /* | ||
277 | * The acpi/srat node info can show hot-add memroy zones | ||
278 | * where memory could be added but not currently present. | ||
279 | */ | ||
280 | printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", | ||
281 | nid, node_start_pfn[nid], node_end_pfn[nid]); | ||
282 | if (node_start_pfn[nid] > max_pfn) | ||
283 | continue; | ||
284 | if (!node_end_pfn[nid]) | ||
285 | continue; | ||
286 | if (node_end_pfn[nid] > max_pfn) | ||
287 | node_end_pfn[nid] = max_pfn; | ||
288 | |||
289 | /* ensure the remap includes space for the pgdat. */ | ||
290 | size = node_remap_size[nid] + sizeof(pg_data_t); | ||
291 | |||
292 | /* convert size to large (pmd size) pages, rounding up */ | ||
293 | size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; | ||
294 | /* now the roundup is correct, convert to PAGE_SIZE pages */ | ||
295 | size = size * PTRS_PER_PTE; | ||
296 | |||
297 | node_kva_target = round_down(node_end_pfn[nid] - size, | ||
298 | PTRS_PER_PTE); | ||
299 | node_kva_target <<= PAGE_SHIFT; | ||
300 | do { | ||
301 | node_kva_final = memblock_find_in_range(node_kva_target, | ||
302 | ((u64)node_end_pfn[nid])<<PAGE_SHIFT, | ||
303 | ((u64)size)<<PAGE_SHIFT, | ||
304 | LARGE_PAGE_BYTES); | ||
305 | node_kva_target -= LARGE_PAGE_BYTES; | ||
306 | } while (node_kva_final == MEMBLOCK_ERROR && | ||
307 | (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid])); | ||
308 | |||
309 | if (node_kva_final == MEMBLOCK_ERROR) | ||
310 | panic("Can not get kva ram\n"); | ||
311 | |||
312 | node_remap_size[nid] = size; | ||
313 | node_remap_offset[nid] = reserve_pages; | ||
314 | reserve_pages += size; | ||
315 | printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of" | ||
316 | " node %d at %llx\n", | ||
317 | size, nid, node_kva_final>>PAGE_SHIFT); | ||
318 | |||
319 | /* | ||
320 | * prevent kva address below max_low_pfn want it on system | ||
321 | * with less memory later. | ||
322 | * layout will be: KVA address , KVA RAM | ||
323 | * | ||
324 | * we are supposed to only record the one less then max_low_pfn | ||
325 | * but we could have some hole in high memory, and it will only | ||
326 | * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide | ||
327 | * to use it as free. | ||
328 | * So memblock_x86_reserve_range here, hope we don't run out of that array | ||
329 | */ | ||
330 | memblock_x86_reserve_range(node_kva_final, | ||
331 | node_kva_final+(((u64)size)<<PAGE_SHIFT), | ||
332 | "KVA RAM"); | ||
333 | |||
334 | node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT; | ||
335 | } | ||
336 | printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", | ||
337 | reserve_pages); | ||
338 | return reserve_pages; | ||
339 | } | ||
340 | 187 | ||
341 | static void init_remap_allocator(int nid) | 188 | /* |
342 | { | 189 | * The acpi/srat node info can show hot-add memroy zones where |
343 | node_remap_start_vaddr[nid] = pfn_to_kaddr( | 190 | * memory could be added but not currently present. |
344 | kva_start_pfn + node_remap_offset[nid]); | 191 | */ |
345 | node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + | 192 | printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", |
346 | (node_remap_size[nid] * PAGE_SIZE); | 193 | nid, start_pfn, end_pfn); |
347 | node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + | 194 | |
348 | ALIGN(sizeof(pg_data_t), PAGE_SIZE); | 195 | /* calculate the necessary space aligned to large page size */ |
349 | 196 | size = node_memmap_size_bytes(nid, start_pfn, end_pfn); | |
350 | printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid, | 197 | size += ALIGN(sizeof(pg_data_t), PAGE_SIZE); |
351 | (ulong) node_remap_start_vaddr[nid], | 198 | size = ALIGN(size, LARGE_PAGE_BYTES); |
352 | (ulong) node_remap_end_vaddr[nid]); | 199 | |
200 | /* allocate node memory and the lowmem remap area */ | ||
201 | node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); | ||
202 | if (node_pa == MEMBLOCK_ERROR) { | ||
203 | pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", | ||
204 | size, nid); | ||
205 | return; | ||
206 | } | ||
207 | memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); | ||
208 | |||
209 | remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, | ||
210 | max_low_pfn << PAGE_SHIFT, | ||
211 | size, LARGE_PAGE_BYTES); | ||
212 | if (remap_pa == MEMBLOCK_ERROR) { | ||
213 | pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", | ||
214 | size, nid); | ||
215 | memblock_x86_free_range(node_pa, node_pa + size); | ||
216 | return; | ||
217 | } | ||
218 | memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG"); | ||
219 | remap_va = phys_to_virt(remap_pa); | ||
220 | |||
221 | /* perform actual remap */ | ||
222 | for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE) | ||
223 | set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT), | ||
224 | (node_pa >> PAGE_SHIFT) + pfn, | ||
225 | PAGE_KERNEL_LARGE); | ||
226 | |||
227 | /* initialize remap allocator parameters */ | ||
228 | node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT; | ||
229 | node_remap_start_vaddr[nid] = remap_va; | ||
230 | node_remap_end_vaddr[nid] = remap_va + size; | ||
231 | node_remap_alloc_vaddr[nid] = remap_va; | ||
232 | |||
233 | printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n", | ||
234 | nid, node_pa, node_pa + size, remap_va, remap_va + size); | ||
353 | } | 235 | } |
354 | 236 | ||
355 | void __init initmem_init(void) | 237 | void __init initmem_init(void) |
356 | { | 238 | { |
357 | int nid; | 239 | x86_numa_init(); |
358 | long kva_target_pfn; | ||
359 | |||
360 | /* | ||
361 | * When mapping a NUMA machine we allocate the node_mem_map arrays | ||
362 | * from node local memory. They are then mapped directly into KVA | ||
363 | * between zone normal and vmalloc space. Calculate the size of | ||
364 | * this space and use it to adjust the boundary between ZONE_NORMAL | ||
365 | * and ZONE_HIGHMEM. | ||
366 | */ | ||
367 | |||
368 | get_memcfg_numa(); | ||
369 | numa_init_array(); | ||
370 | |||
371 | kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); | ||
372 | 240 | ||
373 | kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); | ||
374 | do { | ||
375 | kva_start_pfn = memblock_find_in_range(kva_target_pfn<<PAGE_SHIFT, | ||
376 | max_low_pfn<<PAGE_SHIFT, | ||
377 | kva_pages<<PAGE_SHIFT, | ||
378 | PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT; | ||
379 | kva_target_pfn -= PTRS_PER_PTE; | ||
380 | } while (kva_start_pfn == MEMBLOCK_ERROR && kva_target_pfn > min_low_pfn); | ||
381 | |||
382 | if (kva_start_pfn == MEMBLOCK_ERROR) | ||
383 | panic("Can not get kva space\n"); | ||
384 | |||
385 | printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", | ||
386 | kva_start_pfn, max_low_pfn); | ||
387 | printk(KERN_INFO "max_pfn = %lx\n", max_pfn); | ||
388 | |||
389 | /* avoid clash with initrd */ | ||
390 | memblock_x86_reserve_range(kva_start_pfn<<PAGE_SHIFT, | ||
391 | (kva_start_pfn + kva_pages)<<PAGE_SHIFT, | ||
392 | "KVA PG"); | ||
393 | #ifdef CONFIG_HIGHMEM | 241 | #ifdef CONFIG_HIGHMEM |
394 | highstart_pfn = highend_pfn = max_pfn; | 242 | highstart_pfn = highend_pfn = max_pfn; |
395 | if (max_pfn > max_low_pfn) | 243 | if (max_pfn > max_low_pfn) |
@@ -409,51 +257,9 @@ void __init initmem_init(void) | |||
409 | 257 | ||
410 | printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", | 258 | printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", |
411 | (ulong) pfn_to_kaddr(max_low_pfn)); | 259 | (ulong) pfn_to_kaddr(max_low_pfn)); |
412 | for_each_online_node(nid) { | ||
413 | init_remap_allocator(nid); | ||
414 | |||
415 | allocate_pgdat(nid); | ||
416 | } | ||
417 | remap_numa_kva(); | ||
418 | 260 | ||
419 | printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", | 261 | printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", |
420 | (ulong) pfn_to_kaddr(highstart_pfn)); | 262 | (ulong) pfn_to_kaddr(highstart_pfn)); |
421 | for_each_online_node(nid) | ||
422 | propagate_e820_map_node(nid); | ||
423 | |||
424 | for_each_online_node(nid) { | ||
425 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | ||
426 | NODE_DATA(nid)->node_id = nid; | ||
427 | } | ||
428 | 263 | ||
429 | setup_bootmem_allocator(); | 264 | setup_bootmem_allocator(); |
430 | } | 265 | } |
431 | |||
432 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
433 | static int paddr_to_nid(u64 addr) | ||
434 | { | ||
435 | int nid; | ||
436 | unsigned long pfn = PFN_DOWN(addr); | ||
437 | |||
438 | for_each_node(nid) | ||
439 | if (node_start_pfn[nid] <= pfn && | ||
440 | pfn < node_end_pfn[nid]) | ||
441 | return nid; | ||
442 | |||
443 | return -1; | ||
444 | } | ||
445 | |||
446 | /* | ||
447 | * This function is used to ask node id BEFORE memmap and mem_section's | ||
448 | * initialization (pfn_to_nid() can't be used yet). | ||
449 | * If _PXM is not defined on ACPI's DSDT, node id must be found by this. | ||
450 | */ | ||
451 | int memory_add_physaddr_to_nid(u64 addr) | ||
452 | { | ||
453 | int nid = paddr_to_nid(addr); | ||
454 | return (nid >= 0) ? nid : 0; | ||
455 | } | ||
456 | |||
457 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | ||
458 | #endif | ||
459 | |||