aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2010-02-15 16:43:33 -0500
committerH. Peter Anvin <hpa@zytor.com>2010-02-15 17:34:18 -0500
commitca2107c9d6cf44fb915402d6f12b9d9ff3925cd7 (patch)
tree4d6800724cb104ecec76f145e31202688544af5f
parent8df5bb34defd685fe86f60746bbf3d47d1c6f033 (diff)
x86, numa: Remove configurable node size support for numa emulation
Now that numa=fake=<size>[MG] is implemented, it is possible to remove configurable node size support. The command-line parsing was already broken (numa=fake=*128, for example, would not work) and since fake nodes are now interleaved over physical nodes, this support is no longer required. Signed-off-by: David Rientjes <rientjes@google.com> LKML-Reference: <alpine.DEB.2.00.1002151343080.26927@chino.kir.corp.google.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-rw-r--r--Documentation/x86/x86_64/boot-options.txt16
-rw-r--r--arch/x86/mm/numa_64.c160
2 files changed, 16 insertions, 160 deletions
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 01150c64aa73..7fbbaf85f5b7 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -170,19 +170,9 @@ NUMA
170 If given as a memory unit, fills all system RAM with nodes of 170 If given as a memory unit, fills all system RAM with nodes of
171 size interleaved over physical nodes. 171 size interleaved over physical nodes.
172 172
173 numa=fake=CMDLINE 173 numa=fake=<N>
174 If a number, fakes CMDLINE nodes and ignores NUMA setup of the 174 If given as an integer, fills all system RAM with N fake nodes
175 actual machine. Otherwise, system memory is configured 175 interleaved over physical nodes.
176 depending on the sizes and coefficients listed. For example:
177 numa=fake=2*512,1024,4*256,*128
178 gives two 512M nodes, a 1024M node, four 256M nodes, and the
179 rest split into 128M chunks. If the last character of CMDLINE
180 is a *, the remaining memory is divided up equally among its
181 coefficient:
182 numa=fake=2*512,2*
183 gives two 512M nodes and the rest split into two nodes.
184 Otherwise, the remaining system RAM is allocated to an
185 additional node.
186 176
187ACPI 177ACPI
188 178
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index c47c78ba3aca..3307ea8bd43a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -598,172 +598,38 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
598} 598}
599 599
600/* 600/*
601 * Splits num_nodes nodes up equally starting at node_start. The return value
602 * is the number of nodes split up and addr is adjusted to be at the end of the
603 * last node allocated.
604 */
605static int __init split_nodes_equally(u64 *addr, u64 max_addr, int node_start,
606 int num_nodes)
607{
608 unsigned int big;
609 u64 size;
610 int i;
611
612 if (num_nodes <= 0)
613 return -1;
614 if (num_nodes > MAX_NUMNODES)
615 num_nodes = MAX_NUMNODES;
616 size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) /
617 num_nodes;
618 /*
619 * Calculate the number of big nodes that can be allocated as a result
620 * of consolidating the leftovers.
621 */
622 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) /
623 FAKE_NODE_MIN_SIZE;
624
625 /* Round down to nearest FAKE_NODE_MIN_SIZE. */
626 size &= FAKE_NODE_MIN_HASH_MASK;
627 if (!size) {
628 printk(KERN_ERR "Not enough memory for each node. "
629 "NUMA emulation disabled.\n");
630 return -1;
631 }
632
633 for (i = node_start; i < num_nodes + node_start; i++) {
634 u64 end = *addr + size;
635
636 if (i < big)
637 end += FAKE_NODE_MIN_SIZE;
638 /*
639 * The final node can have the remaining system RAM. Other
640 * nodes receive roughly the same amount of available pages.
641 */
642 if (i == num_nodes + node_start - 1)
643 end = max_addr;
644 else
645 end = find_end_of_node(*addr, max_addr, size);
646 if (setup_node_range(i, addr, end - *addr, max_addr) < 0)
647 break;
648 }
649 return i - node_start + 1;
650}
651
652/*
653 * Splits the remaining system RAM into chunks of size. The remaining memory is
654 * always assigned to a final node and can be asymmetric. Returns the number of
655 * nodes split.
656 */
657static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start,
658 u64 size)
659{
660 int i = node_start;
661 size = (size << 20) & FAKE_NODE_MIN_HASH_MASK;
662 while (!setup_node_range(i++, addr, size, max_addr))
663 ;
664 return i - node_start;
665}
666
667/*
668 * Sets up the system RAM area from start_pfn to last_pfn according to the 601 * Sets up the system RAM area from start_pfn to last_pfn according to the
669 * numa=fake command-line option. 602 * numa=fake command-line option.
670 */ 603 */
671static int __init numa_emulation(unsigned long start_pfn, 604static int __init numa_emulation(unsigned long start_pfn,
672 unsigned long last_pfn, int acpi, int k8) 605 unsigned long last_pfn, int acpi, int k8)
673{ 606{
674 u64 size, addr = start_pfn << PAGE_SHIFT; 607 u64 addr = start_pfn << PAGE_SHIFT;
675 u64 max_addr = last_pfn << PAGE_SHIFT; 608 u64 max_addr = last_pfn << PAGE_SHIFT;
676 int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
677 int num_phys_nodes; 609 int num_phys_nodes;
610 int num_nodes;
611 int i;
678 612
679 num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); 613 num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8);
680 /* 614 /*
681 * If the numa=fake command-line contains a 'M' or 'G', it represents 615 * If the numa=fake command-line contains a 'M' or 'G', it represents
682 * the fixed node size. 616 * the fixed node size. Otherwise, if it is just a single number N,
617 * split the system RAM into N fake nodes.
683 */ 618 */
684 if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) { 619 if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) {
620 u64 size;
621
685 size = memparse(cmdline, &cmdline); 622 size = memparse(cmdline, &cmdline);
686 num_nodes = split_nodes_size_interleave(addr, max_addr, size); 623 num_nodes = split_nodes_size_interleave(addr, max_addr, size);
687 if (num_nodes < 0) 624 } else {
688 return num_nodes; 625 unsigned long n;
689 goto out;
690 }
691 626
692 /* 627 n = simple_strtoul(cmdline, NULL, 0);
693 * If the numa=fake command-line is just a single number N, split the 628 num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n);
694 * system RAM into N fake nodes.
695 */
696 if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) {
697 long n = simple_strtol(cmdline, NULL, 0);
698
699 num_nodes = split_nodes_interleave(addr, max_addr,
700 num_phys_nodes, n);
701 if (num_nodes < 0)
702 return num_nodes;
703 goto out;
704 } 629 }
705 630
706 /* Parse the command line. */ 631 if (num_nodes < 0)
707 for (coeff_flag = 0; ; cmdline++) { 632 return num_nodes;
708 if (*cmdline && isdigit(*cmdline)) {
709 num = num * 10 + *cmdline - '0';
710 continue;
711 }
712 if (*cmdline == '*') {
713 if (num > 0)
714 coeff = num;
715 coeff_flag = 1;
716 }
717 if (!*cmdline || *cmdline == ',') {
718 if (!coeff_flag)
719 coeff = 1;
720 /*
721 * Round down to the nearest FAKE_NODE_MIN_SIZE.
722 * Command-line coefficients are in megabytes.
723 */
724 size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK;
725 if (size)
726 for (i = 0; i < coeff; i++, num_nodes++)
727 if (setup_node_range(num_nodes, &addr,
728 size, max_addr) < 0)
729 goto done;
730 if (!*cmdline)
731 break;
732 coeff_flag = 0;
733 coeff = -1;
734 }
735 num = 0;
736 }
737done:
738 if (!num_nodes)
739 return -1;
740 /* Fill remainder of system RAM, if appropriate. */
741 if (addr < max_addr) {
742 if (coeff_flag && coeff < 0) {
743 /* Split remaining nodes into num-sized chunks */
744 num_nodes += split_nodes_by_size(&addr, max_addr,
745 num_nodes, num);
746 goto out;
747 }
748 switch (*(cmdline - 1)) {
749 case '*':
750 /* Split remaining nodes into coeff chunks */
751 if (coeff <= 0)
752 break;
753 num_nodes += split_nodes_equally(&addr, max_addr,
754 num_nodes, coeff);
755 break;
756 case ',':
757 /* Do not allocate remaining system RAM */
758 break;
759 default:
760 /* Give one final node */
761 setup_node_range(num_nodes, &addr, max_addr - addr,
762 max_addr);
763 num_nodes++;
764 }
765 }
766out:
767 memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); 633 memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);
768 if (memnode_shift < 0) { 634 if (memnode_shift < 0) {
769 memnode_shift = 0; 635 memnode_shift = 0;