diff options
author | David Rientjes <rientjes@google.com> | 2010-02-15 16:43:33 -0500 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2010-02-15 17:34:18 -0500 |
commit | ca2107c9d6cf44fb915402d6f12b9d9ff3925cd7 (patch) | |
tree | 4d6800724cb104ecec76f145e31202688544af5f | |
parent | 8df5bb34defd685fe86f60746bbf3d47d1c6f033 (diff) |
x86, numa: Remove configurable node size support for numa emulation
Now that numa=fake=<size>[MG] is implemented, it is possible to remove
configurable node size support. The command-line parsing was already
broken (numa=fake=*128, for example, would not work) and since fake nodes
are now interleaved over physical nodes, this support is no longer
required.
Signed-off-by: David Rientjes <rientjes@google.com>
LKML-Reference: <alpine.DEB.2.00.1002151343080.26927@chino.kir.corp.google.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-rw-r--r-- | Documentation/x86/x86_64/boot-options.txt | 16 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 160 |
2 files changed, 16 insertions, 160 deletions
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index 01150c64aa73..7fbbaf85f5b7 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt | |||
@@ -170,19 +170,9 @@ NUMA | |||
170 | If given as a memory unit, fills all system RAM with nodes of | 170 | If given as a memory unit, fills all system RAM with nodes of |
171 | size interleaved over physical nodes. | 171 | size interleaved over physical nodes. |
172 | 172 | ||
173 | numa=fake=CMDLINE | 173 | numa=fake=<N> |
174 | If a number, fakes CMDLINE nodes and ignores NUMA setup of the | 174 | If given as an integer, fills all system RAM with N fake nodes |
175 | actual machine. Otherwise, system memory is configured | 175 | interleaved over physical nodes. |
176 | depending on the sizes and coefficients listed. For example: | ||
177 | numa=fake=2*512,1024,4*256,*128 | ||
178 | gives two 512M nodes, a 1024M node, four 256M nodes, and the | ||
179 | rest split into 128M chunks. If the last character of CMDLINE | ||
180 | is a *, the remaining memory is divided up equally among its | ||
181 | coefficient: | ||
182 | numa=fake=2*512,2* | ||
183 | gives two 512M nodes and the rest split into two nodes. | ||
184 | Otherwise, the remaining system RAM is allocated to an | ||
185 | additional node. | ||
186 | 176 | ||
187 | ACPI | 177 | ACPI |
188 | 178 | ||
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index c47c78ba3aca..3307ea8bd43a 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -598,172 +598,38 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) | |||
598 | } | 598 | } |
599 | 599 | ||
600 | /* | 600 | /* |
601 | * Splits num_nodes nodes up equally starting at node_start. The return value | ||
602 | * is the number of nodes split up and addr is adjusted to be at the end of the | ||
603 | * last node allocated. | ||
604 | */ | ||
605 | static int __init split_nodes_equally(u64 *addr, u64 max_addr, int node_start, | ||
606 | int num_nodes) | ||
607 | { | ||
608 | unsigned int big; | ||
609 | u64 size; | ||
610 | int i; | ||
611 | |||
612 | if (num_nodes <= 0) | ||
613 | return -1; | ||
614 | if (num_nodes > MAX_NUMNODES) | ||
615 | num_nodes = MAX_NUMNODES; | ||
616 | size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) / | ||
617 | num_nodes; | ||
618 | /* | ||
619 | * Calculate the number of big nodes that can be allocated as a result | ||
620 | * of consolidating the leftovers. | ||
621 | */ | ||
622 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) / | ||
623 | FAKE_NODE_MIN_SIZE; | ||
624 | |||
625 | /* Round down to nearest FAKE_NODE_MIN_SIZE. */ | ||
626 | size &= FAKE_NODE_MIN_HASH_MASK; | ||
627 | if (!size) { | ||
628 | printk(KERN_ERR "Not enough memory for each node. " | ||
629 | "NUMA emulation disabled.\n"); | ||
630 | return -1; | ||
631 | } | ||
632 | |||
633 | for (i = node_start; i < num_nodes + node_start; i++) { | ||
634 | u64 end = *addr + size; | ||
635 | |||
636 | if (i < big) | ||
637 | end += FAKE_NODE_MIN_SIZE; | ||
638 | /* | ||
639 | * The final node can have the remaining system RAM. Other | ||
640 | * nodes receive roughly the same amount of available pages. | ||
641 | */ | ||
642 | if (i == num_nodes + node_start - 1) | ||
643 | end = max_addr; | ||
644 | else | ||
645 | end = find_end_of_node(*addr, max_addr, size); | ||
646 | if (setup_node_range(i, addr, end - *addr, max_addr) < 0) | ||
647 | break; | ||
648 | } | ||
649 | return i - node_start + 1; | ||
650 | } | ||
651 | |||
652 | /* | ||
653 | * Splits the remaining system RAM into chunks of size. The remaining memory is | ||
654 | * always assigned to a final node and can be asymmetric. Returns the number of | ||
655 | * nodes split. | ||
656 | */ | ||
657 | static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start, | ||
658 | u64 size) | ||
659 | { | ||
660 | int i = node_start; | ||
661 | size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; | ||
662 | while (!setup_node_range(i++, addr, size, max_addr)) | ||
663 | ; | ||
664 | return i - node_start; | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * Sets up the system RAM area from start_pfn to last_pfn according to the | 601 | * Sets up the system RAM area from start_pfn to last_pfn according to the |
669 | * numa=fake command-line option. | 602 | * numa=fake command-line option. |
670 | */ | 603 | */ |
671 | static int __init numa_emulation(unsigned long start_pfn, | 604 | static int __init numa_emulation(unsigned long start_pfn, |
672 | unsigned long last_pfn, int acpi, int k8) | 605 | unsigned long last_pfn, int acpi, int k8) |
673 | { | 606 | { |
674 | u64 size, addr = start_pfn << PAGE_SHIFT; | 607 | u64 addr = start_pfn << PAGE_SHIFT; |
675 | u64 max_addr = last_pfn << PAGE_SHIFT; | 608 | u64 max_addr = last_pfn << PAGE_SHIFT; |
676 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; | ||
677 | int num_phys_nodes; | 609 | int num_phys_nodes; |
610 | int num_nodes; | ||
611 | int i; | ||
678 | 612 | ||
679 | num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); | 613 | num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); |
680 | /* | 614 | /* |
681 | * If the numa=fake command-line contains a 'M' or 'G', it represents | 615 | * If the numa=fake command-line contains a 'M' or 'G', it represents |
682 | * the fixed node size. | 616 | * the fixed node size. Otherwise, if it is just a single number N, |
617 | * split the system RAM into N fake nodes. | ||
683 | */ | 618 | */ |
684 | if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) { | 619 | if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) { |
620 | u64 size; | ||
621 | |||
685 | size = memparse(cmdline, &cmdline); | 622 | size = memparse(cmdline, &cmdline); |
686 | num_nodes = split_nodes_size_interleave(addr, max_addr, size); | 623 | num_nodes = split_nodes_size_interleave(addr, max_addr, size); |
687 | if (num_nodes < 0) | 624 | } else { |
688 | return num_nodes; | 625 | unsigned long n; |
689 | goto out; | ||
690 | } | ||
691 | 626 | ||
692 | /* | 627 | n = simple_strtoul(cmdline, NULL, 0); |
693 | * If the numa=fake command-line is just a single number N, split the | 628 | num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n); |
694 | * system RAM into N fake nodes. | ||
695 | */ | ||
696 | if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { | ||
697 | long n = simple_strtol(cmdline, NULL, 0); | ||
698 | |||
699 | num_nodes = split_nodes_interleave(addr, max_addr, | ||
700 | num_phys_nodes, n); | ||
701 | if (num_nodes < 0) | ||
702 | return num_nodes; | ||
703 | goto out; | ||
704 | } | 629 | } |
705 | 630 | ||
706 | /* Parse the command line. */ | 631 | if (num_nodes < 0) |
707 | for (coeff_flag = 0; ; cmdline++) { | 632 | return num_nodes; |
708 | if (*cmdline && isdigit(*cmdline)) { | ||
709 | num = num * 10 + *cmdline - '0'; | ||
710 | continue; | ||
711 | } | ||
712 | if (*cmdline == '*') { | ||
713 | if (num > 0) | ||
714 | coeff = num; | ||
715 | coeff_flag = 1; | ||
716 | } | ||
717 | if (!*cmdline || *cmdline == ',') { | ||
718 | if (!coeff_flag) | ||
719 | coeff = 1; | ||
720 | /* | ||
721 | * Round down to the nearest FAKE_NODE_MIN_SIZE. | ||
722 | * Command-line coefficients are in megabytes. | ||
723 | */ | ||
724 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; | ||
725 | if (size) | ||
726 | for (i = 0; i < coeff; i++, num_nodes++) | ||
727 | if (setup_node_range(num_nodes, &addr, | ||
728 | size, max_addr) < 0) | ||
729 | goto done; | ||
730 | if (!*cmdline) | ||
731 | break; | ||
732 | coeff_flag = 0; | ||
733 | coeff = -1; | ||
734 | } | ||
735 | num = 0; | ||
736 | } | ||
737 | done: | ||
738 | if (!num_nodes) | ||
739 | return -1; | ||
740 | /* Fill remainder of system RAM, if appropriate. */ | ||
741 | if (addr < max_addr) { | ||
742 | if (coeff_flag && coeff < 0) { | ||
743 | /* Split remaining nodes into num-sized chunks */ | ||
744 | num_nodes += split_nodes_by_size(&addr, max_addr, | ||
745 | num_nodes, num); | ||
746 | goto out; | ||
747 | } | ||
748 | switch (*(cmdline - 1)) { | ||
749 | case '*': | ||
750 | /* Split remaining nodes into coeff chunks */ | ||
751 | if (coeff <= 0) | ||
752 | break; | ||
753 | num_nodes += split_nodes_equally(&addr, max_addr, | ||
754 | num_nodes, coeff); | ||
755 | break; | ||
756 | case ',': | ||
757 | /* Do not allocate remaining system RAM */ | ||
758 | break; | ||
759 | default: | ||
760 | /* Give one final node */ | ||
761 | setup_node_range(num_nodes, &addr, max_addr - addr, | ||
762 | max_addr); | ||
763 | num_nodes++; | ||
764 | } | ||
765 | } | ||
766 | out: | ||
767 | memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); | 633 | memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); |
768 | if (memnode_shift < 0) { | 634 | if (memnode_shift < 0) { |
769 | memnode_shift = 0; | 635 | memnode_shift = 0; |