aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/mm')
-rw-r--r--arch/x86_64/mm/numa.c110
1 files changed, 96 insertions, 14 deletions
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 1ec16ea97519..d3f747dd61d3 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -272,31 +272,113 @@ void __init numa_init_array(void)
272} 272}
273 273
274#ifdef CONFIG_NUMA_EMU 274#ifdef CONFIG_NUMA_EMU
275/* Numa emulation */
275int numa_fake __initdata = 0; 276int numa_fake __initdata = 0;
276 277
277/* Numa emulation */ 278/*
279 * This function is used to find out if the start and end correspond to
280 * different zones.
281 */
282int zone_cross_over(unsigned long start, unsigned long end)
283{
284 if ((start < (MAX_DMA32_PFN << PAGE_SHIFT)) &&
285 (end >= (MAX_DMA32_PFN << PAGE_SHIFT)))
286 return 1;
287 return 0;
288}
289
278static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) 290static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
279{ 291{
280 int i; 292 int i, big;
281 struct bootnode nodes[MAX_NUMNODES]; 293 struct bootnode nodes[MAX_NUMNODES];
282 unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake; 294 unsigned long sz, old_sz;
295 unsigned long hole_size;
296 unsigned long start, end;
297 unsigned long max_addr = (end_pfn << PAGE_SHIFT);
298
299 start = (start_pfn << PAGE_SHIFT);
300 hole_size = e820_hole_size(start, max_addr);
301 sz = (max_addr - start - hole_size) / numa_fake;
283 302
284 /* Kludge needed for the hash function */ 303 /* Kludge needed for the hash function */
285 if (hweight64(sz) > 1) {
286 unsigned long x = 1;
287 while ((x << 1) < sz)
288 x <<= 1;
289 if (x < sz/2)
290 printk(KERN_ERR "Numa emulation unbalanced. Complain to maintainer\n");
291 sz = x;
292 }
293 304
305 old_sz = sz;
306 /*
307 * Round down to the nearest FAKE_NODE_MIN_SIZE.
308 */
309 sz &= FAKE_NODE_MIN_HASH_MASK;
310
311 /*
312 * We ensure that each node is at least 64MB big. Smaller than this
313 * size can cause VM hiccups.
314 */
315 if (sz == 0) {
316 printk(KERN_INFO "Not enough memory for %d nodes. Reducing "
317 "the number of nodes\n", numa_fake);
318 numa_fake = (max_addr - start - hole_size) / FAKE_NODE_MIN_SIZE;
319 printk(KERN_INFO "Number of fake nodes will be = %d\n",
320 numa_fake);
321 sz = FAKE_NODE_MIN_SIZE;
322 }
323 /*
324 * Find out how many nodes can get an extra NODE_MIN_SIZE granule.
325 * This logic ensures the extra memory gets distributed among as many
326 * nodes as possible (as compared to one single node getting all that
327 * extra memory.
328 */
329 big = ((old_sz - sz) * numa_fake) / FAKE_NODE_MIN_SIZE;
330 printk(KERN_INFO "Fake node Size: %luMB hole_size: %luMB big nodes: "
331 "%d\n",
332 (sz >> 20), (hole_size >> 20), big);
294 memset(&nodes,0,sizeof(nodes)); 333 memset(&nodes,0,sizeof(nodes));
334 end = start;
295 for (i = 0; i < numa_fake; i++) { 335 for (i = 0; i < numa_fake; i++) {
296 nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz; 336 /*
337 * In case we are not able to allocate enough memory for all
338 * the nodes, we reduce the number of fake nodes.
339 */
340 if (end >= max_addr) {
341 numa_fake = i - 1;
342 break;
343 }
344 start = nodes[i].start = end;
345 /*
346 * Final node can have all the remaining memory.
347 */
297 if (i == numa_fake-1) 348 if (i == numa_fake-1)
298 sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; 349 sz = max_addr - start;
299 nodes[i].end = nodes[i].start + sz; 350 end = nodes[i].start + sz;
351 /*
352 * Fir "big" number of nodes get extra granule.
353 */
354 if (i < big)
355 end += FAKE_NODE_MIN_SIZE;
356 /*
357 * Iterate over the range to ensure that this node gets at
358 * least sz amount of RAM (excluding holes)
359 */
360 while ((end - start - e820_hole_size(start, end)) < sz) {
361 end += FAKE_NODE_MIN_SIZE;
362 if (end >= max_addr)
363 break;
364 }
365 /*
366 * Look at the next node to make sure there is some real memory
367 * to map. Bad things happen when the only memory present
368 * in a zone on a fake node is IO hole.
369 */
370 while (e820_hole_size(end, end + FAKE_NODE_MIN_SIZE) > 0) {
371 if (zone_cross_over(start, end + sz)) {
372 end = (MAX_DMA32_PFN << PAGE_SHIFT);
373 break;
374 }
375 if (end >= max_addr)
376 break;
377 end += FAKE_NODE_MIN_SIZE;
378 }
379 if (end > max_addr)
380 end = max_addr;
381 nodes[i].end = end;
300 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", 382 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n",
301 i, 383 i,
302 nodes[i].start, nodes[i].end, 384 nodes[i].start, nodes[i].end,