diff options
author | David Rientjes <rientjes@google.com> | 2007-05-02 13:27:09 -0400 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2007-05-02 13:27:09 -0400 |
commit | 382591d500bbcd20a44416c5e0e292708468587c (patch) | |
tree | deb996c4dca98cf36c432ddb710719f7d0202252 | |
parent | 14694d736bb66d0ec250d05c81c6e98a19c229c6 (diff) |
[PATCH] x86-64: fixed size remaining fake nodes
Extends the numa=fake x86_64 command-line option to split the remaining system
memory into nodes of fixed size. Any leftover memory is allocated to a final
node unless the command-line ends with a comma.
For example:
numa=fake=2*512,*128 gives two 512M nodes and the remaining system
memory is split into nodes of 128M each.
This is beneficial for systems where the exact size of RAM is unknown or not
necessarily relevant, but the size of the remaining nodes to be allocated is
known based on their capacity for resource management.
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r-- | Documentation/x86_64/boot-options.txt | 14 | ||||
-rw-r--r-- | arch/x86_64/mm/numa.c | 47 |
2 files changed, 46 insertions, 15 deletions
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index 12a9aacecaae..6177d881983f 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt | |||
@@ -153,11 +153,15 @@ NUMA | |||
153 | If a number, fakes CMDLINE nodes and ignores NUMA setup of the | 153 | If a number, fakes CMDLINE nodes and ignores NUMA setup of the |
154 | actual machine. Otherwise, system memory is configured | 154 | actual machine. Otherwise, system memory is configured |
155 | depending on the sizes and coefficients listed. For example: | 155 | depending on the sizes and coefficients listed. For example: |
156 | numa=fake=2*512,1024,4*256 | 156 | numa=fake=2*512,1024,4*256,*128 |
157 | gives two 512M nodes, a 1024M node, and four 256M nodes. The | 157 | gives two 512M nodes, a 1024M node, four 256M nodes, and the |
158 | remaining system RAM is allocated to an additional node. If | 158 | rest split into 128M chunks. If the last character of CMDLINE |
159 | the last character of CMDLINE is a *, the remaining system RAM | 159 | is a *, the remaining memory is divided up equally among its |
160 | is instead divided up equally among its coefficient. | 160 | coefficient: |
161 | numa=fake=2*512,2* | ||
162 | gives two 512M nodes and the rest split into two nodes. | ||
163 | Otherwise, the remaining system RAM is allocated to an | ||
164 | additional node. | ||
161 | 165 | ||
162 | numa=hotadd=percent | 166 | numa=hotadd=percent |
163 | Only allow hotadd memory to preallocate page structures upto | 167 | Only allow hotadd memory to preallocate page structures upto |
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 0ae2d9d5d7ea..5ee07bc41eb5 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c | |||
@@ -362,6 +362,21 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, | |||
362 | } | 362 | } |
363 | 363 | ||
364 | /* | 364 | /* |
365 | * Splits the remaining system RAM into chunks of size. The remaining memory is | ||
366 | * always assigned to a final node and can be asymmetric. Returns the number of | ||
367 | * nodes split. | ||
368 | */ | ||
369 | static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, | ||
370 | u64 max_addr, int node_start, u64 size) | ||
371 | { | ||
372 | int i = node_start; | ||
373 | size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; | ||
374 | while (!setup_node_range(i++, nodes, addr, size, max_addr)) | ||
375 | ; | ||
376 | return i - node_start; | ||
377 | } | ||
378 | |||
379 | /* | ||
365 | * Sets up the system RAM area from start_pfn to end_pfn according to the | 380 | * Sets up the system RAM area from start_pfn to end_pfn according to the |
366 | * numa=fake command-line option. | 381 | * numa=fake command-line option. |
367 | */ | 382 | */ |
@@ -370,9 +385,10 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | |||
370 | struct bootnode nodes[MAX_NUMNODES]; | 385 | struct bootnode nodes[MAX_NUMNODES]; |
371 | u64 addr = start_pfn << PAGE_SHIFT; | 386 | u64 addr = start_pfn << PAGE_SHIFT; |
372 | u64 max_addr = end_pfn << PAGE_SHIFT; | 387 | u64 max_addr = end_pfn << PAGE_SHIFT; |
373 | unsigned int coeff; | ||
374 | unsigned int num = 0; | ||
375 | int num_nodes = 0; | 388 | int num_nodes = 0; |
389 | int coeff_flag; | ||
390 | int coeff = -1; | ||
391 | int num = 0; | ||
376 | u64 size; | 392 | u64 size; |
377 | int i; | 393 | int i; |
378 | 394 | ||
@@ -390,29 +406,34 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | |||
390 | } | 406 | } |
391 | 407 | ||
392 | /* Parse the command line. */ | 408 | /* Parse the command line. */ |
393 | for (coeff = 1; ; cmdline++) { | 409 | for (coeff_flag = 0; ; cmdline++) { |
394 | if (*cmdline && isdigit(*cmdline)) { | 410 | if (*cmdline && isdigit(*cmdline)) { |
395 | num = num * 10 + *cmdline - '0'; | 411 | num = num * 10 + *cmdline - '0'; |
396 | continue; | 412 | continue; |
397 | } | 413 | } |
398 | if (*cmdline == '*') | 414 | if (*cmdline == '*') { |
399 | coeff = num; | 415 | if (num > 0) |
416 | coeff = num; | ||
417 | coeff_flag = 1; | ||
418 | } | ||
400 | if (!*cmdline || *cmdline == ',') { | 419 | if (!*cmdline || *cmdline == ',') { |
420 | if (!coeff_flag) | ||
421 | coeff = 1; | ||
401 | /* | 422 | /* |
402 | * Round down to the nearest FAKE_NODE_MIN_SIZE. | 423 | * Round down to the nearest FAKE_NODE_MIN_SIZE. |
403 | * Command-line coefficients are in megabytes. | 424 | * Command-line coefficients are in megabytes. |
404 | */ | 425 | */ |
405 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; | 426 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; |
406 | if (size) { | 427 | if (size) |
407 | for (i = 0; i < coeff; i++, num_nodes++) | 428 | for (i = 0; i < coeff; i++, num_nodes++) |
408 | if (setup_node_range(num_nodes, nodes, | 429 | if (setup_node_range(num_nodes, nodes, |
409 | &addr, size, max_addr) < 0) | 430 | &addr, size, max_addr) < 0) |
410 | goto done; | 431 | goto done; |
411 | coeff = 1; | 432 | if (!*cmdline) |
412 | } | 433 | break; |
434 | coeff_flag = 0; | ||
435 | coeff = -1; | ||
413 | } | 436 | } |
414 | if (!*cmdline) | ||
415 | break; | ||
416 | num = 0; | 437 | num = 0; |
417 | } | 438 | } |
418 | done: | 439 | done: |
@@ -420,6 +441,12 @@ done: | |||
420 | return -1; | 441 | return -1; |
421 | /* Fill remainder of system RAM, if appropriate. */ | 442 | /* Fill remainder of system RAM, if appropriate. */ |
422 | if (addr < max_addr) { | 443 | if (addr < max_addr) { |
444 | if (coeff_flag && coeff < 0) { | ||
445 | /* Split remaining nodes into num-sized chunks */ | ||
446 | num_nodes += split_nodes_by_size(nodes, &addr, max_addr, | ||
447 | num_nodes, num); | ||
448 | goto out; | ||
449 | } | ||
423 | switch (*(cmdline - 1)) { | 450 | switch (*(cmdline - 1)) { |
424 | case '*': | 451 | case '*': |
425 | /* Split remaining nodes into coeff chunks */ | 452 | /* Split remaining nodes into coeff chunks */ |