aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/mm/numa_64.c173
1 files changed, 86 insertions, 87 deletions
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index e9919c4d1573..9736204337b8 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -541,7 +541,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
541 541
542#ifdef CONFIG_NUMA_EMU 542#ifdef CONFIG_NUMA_EMU
543/* Numa emulation */ 543/* Numa emulation */
544static struct bootnode nodes[MAX_NUMNODES] __initdata;
545static struct bootnode physnodes[MAX_NUMNODES] __initdata; 544static struct bootnode physnodes[MAX_NUMNODES] __initdata;
546 545
547static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata; 546static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata;
@@ -626,9 +625,24 @@ static int __init setup_physnodes(unsigned long start, unsigned long end)
626 return ret; 625 return ret;
627} 626}
628 627
629static void __init fake_physnodes(int acpi, int amd, int nr_nodes) 628static void __init fake_physnodes(int acpi, int amd,
629 const struct numa_meminfo *ei)
630{ 630{
631 int i; 631 static struct bootnode nodes[MAX_NUMNODES] __initdata;
632 int i, nr_nodes = 0;
633
634 for (i = 0; i < ei->nr_blks; i++) {
635 int nid = ei->blk[i].nid;
636
637 if (nodes[nid].start == nodes[nid].end) {
638 nodes[nid].start = ei->blk[i].start;
639 nodes[nid].end = ei->blk[i].end;
640 nr_nodes++;
641 } else {
642 nodes[nid].start = min(ei->blk[i].start, nodes[nid].start);
643 nodes[nid].end = max(ei->blk[i].end, nodes[nid].end);
644 }
645 }
632 646
633 BUG_ON(acpi && amd); 647 BUG_ON(acpi && amd);
634#ifdef CONFIG_ACPI_NUMA 648#ifdef CONFIG_ACPI_NUMA
@@ -645,45 +659,44 @@ static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
645} 659}
646 660
647/* 661/*
648 * Setups up nid to range from addr to addr + size. If the end 662 * Sets up nid to range from @start to @end. The return value is -errno if
649 * boundary is greater than max_addr, then max_addr is used instead. 663 * something went wrong, 0 otherwise.
650 * The return value is 0 if there is additional memory left for
651 * allocation past addr and -1 otherwise. addr is adjusted to be at
652 * the end of the node.
653 */ 664 */
654static int __init setup_node_range(int nid, int physnid, 665static int __init emu_setup_memblk(struct numa_meminfo *ei,
655 u64 *addr, u64 size, u64 max_addr) 666 int nid, int physnid, u64 start, u64 end)
656{ 667{
657 int ret = 0; 668 struct numa_memblk *eb = &ei->blk[ei->nr_blks];
658 nodes[nid].start = *addr; 669
659 *addr += size; 670 if (ei->nr_blks >= NR_NODE_MEMBLKS) {
660 if (*addr >= max_addr) { 671 pr_err("NUMA: Too many emulated memblks, failing emulation\n");
661 *addr = max_addr; 672 return -EINVAL;
662 ret = -1;
663 } 673 }
664 nodes[nid].end = *addr; 674
665 node_set(nid, node_possible_map); 675 ei->nr_blks++;
676 eb->start = start;
677 eb->end = end;
678 eb->nid = nid;
666 679
667 if (emu_nid_to_phys[nid] == NUMA_NO_NODE) 680 if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
668 emu_nid_to_phys[nid] = physnid; 681 emu_nid_to_phys[nid] = physnid;
669 682
670 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, 683 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
671 nodes[nid].start, nodes[nid].end, 684 eb->start, eb->end, (eb->end - eb->start) >> 20);
672 (nodes[nid].end - nodes[nid].start) >> 20); 685 return 0;
673 return ret;
674} 686}
675 687
676/* 688/*
677 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr 689 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
678 * to max_addr. The return value is the number of nodes allocated. 690 * to max_addr. The return value is the number of nodes allocated.
679 */ 691 */
680static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes) 692static int __init split_nodes_interleave(struct numa_meminfo *ei,
693 u64 addr, u64 max_addr, int nr_nodes)
681{ 694{
682 nodemask_t physnode_mask = NODE_MASK_NONE; 695 nodemask_t physnode_mask = NODE_MASK_NONE;
683 u64 size; 696 u64 size;
684 int big; 697 int big;
685 int ret = 0; 698 int nid = 0;
686 int i; 699 int i, ret;
687 700
688 if (nr_nodes <= 0) 701 if (nr_nodes <= 0)
689 return -1; 702 return -1;
@@ -721,7 +734,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
721 u64 end = physnodes[i].start + size; 734 u64 end = physnodes[i].start + size;
722 u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); 735 u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
723 736
724 if (ret < big) 737 if (nid < big)
725 end += FAKE_NODE_MIN_SIZE; 738 end += FAKE_NODE_MIN_SIZE;
726 739
727 /* 740 /*
@@ -760,16 +773,21 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
760 * happen as a result of rounding down each node's size 773 * happen as a result of rounding down each node's size
761 * to FAKE_NODE_MIN_SIZE. 774 * to FAKE_NODE_MIN_SIZE.
762 */ 775 */
763 if (nodes_weight(physnode_mask) + ret >= nr_nodes) 776 if (nodes_weight(physnode_mask) + nid >= nr_nodes)
764 end = physnodes[i].end; 777 end = physnodes[i].end;
765 778
766 if (setup_node_range(ret++, i, &physnodes[i].start, 779 ret = emu_setup_memblk(ei, nid++, i,
767 end - physnodes[i].start, 780 physnodes[i].start,
768 physnodes[i].end) < 0) 781 min(end, physnodes[i].end));
782 if (ret < 0)
783 return ret;
784
785 physnodes[i].start = min(end, physnodes[i].end);
786 if (physnodes[i].start == physnodes[i].end)
769 node_clear(i, physnode_mask); 787 node_clear(i, physnode_mask);
770 } 788 }
771 } 789 }
772 return ret; 790 return 0;
773} 791}
774 792
775/* 793/*
@@ -794,12 +812,13 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
794 * Sets up fake nodes of `size' interleaved over physical nodes ranging from 812 * Sets up fake nodes of `size' interleaved over physical nodes ranging from
795 * `addr' to `max_addr'. The return value is the number of nodes allocated. 813 * `addr' to `max_addr'. The return value is the number of nodes allocated.
796 */ 814 */
797static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) 815static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
816 u64 addr, u64 max_addr, u64 size)
798{ 817{
799 nodemask_t physnode_mask = NODE_MASK_NONE; 818 nodemask_t physnode_mask = NODE_MASK_NONE;
800 u64 min_size; 819 u64 min_size;
801 int ret = 0; 820 int nid = 0;
802 int i; 821 int i, ret;
803 822
804 if (!size) 823 if (!size)
805 return -1; 824 return -1;
@@ -854,30 +873,31 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
854 memblock_x86_hole_size(end, physnodes[i].end) < size) 873 memblock_x86_hole_size(end, physnodes[i].end) < size)
855 end = physnodes[i].end; 874 end = physnodes[i].end;
856 875
857 /* 876 ret = emu_setup_memblk(ei, nid++, i,
858 * Setup the fake node that will be allocated as bootmem 877 physnodes[i].start,
859 * later. If setup_node_range() returns non-zero, there 878 min(end, physnodes[i].end));
860 * is no more memory available on this physical node. 879 if (ret < 0)
861 */ 880 return ret;
862 if (setup_node_range(ret++, i, &physnodes[i].start, 881
863 end - physnodes[i].start, 882 physnodes[i].start = min(end, physnodes[i].end);
864 physnodes[i].end) < 0) 883 if (physnodes[i].start == physnodes[i].end)
865 node_clear(i, physnode_mask); 884 node_clear(i, physnode_mask);
866 } 885 }
867 } 886 }
868 return ret; 887 return 0;
869} 888}
870 889
871/* 890/*
872 * Sets up the system RAM area from start_pfn to last_pfn according to the 891 * Sets up the system RAM area from start_pfn to last_pfn according to the
873 * numa=fake command-line option. 892 * numa=fake command-line option.
874 */ 893 */
875static int __init numa_emulation(int acpi, int amd) 894static bool __init numa_emulation(int acpi, int amd)
876{ 895{
877 static struct numa_meminfo ei __initdata; 896 static struct numa_meminfo ei __initdata;
878 const u64 max_addr = max_pfn << PAGE_SHIFT; 897 const u64 max_addr = max_pfn << PAGE_SHIFT;
879 int num_nodes; 898 int i, ret;
880 int i; 899
900 memset(&ei, 0, sizeof(ei));
881 901
882 for (i = 0; i < MAX_NUMNODES; i++) 902 for (i = 0; i < MAX_NUMNODES; i++)
883 emu_nid_to_phys[i] = NUMA_NO_NODE; 903 emu_nid_to_phys[i] = NUMA_NO_NODE;
@@ -891,52 +911,33 @@ static int __init numa_emulation(int acpi, int amd)
891 u64 size; 911 u64 size;
892 912
893 size = memparse(emu_cmdline, &emu_cmdline); 913 size = memparse(emu_cmdline, &emu_cmdline);
894 num_nodes = split_nodes_size_interleave(0, max_addr, size); 914 ret = split_nodes_size_interleave(&ei, 0, max_addr, size);
895 } else { 915 } else {
896 unsigned long n; 916 unsigned long n;
897 917
898 n = simple_strtoul(emu_cmdline, NULL, 0); 918 n = simple_strtoul(emu_cmdline, NULL, 0);
899 num_nodes = split_nodes_interleave(0, max_addr, n); 919 ret = split_nodes_interleave(&ei, 0, max_addr, n);
920 }
921
922 if (ret < 0)
923 return false;
924
925 if (numa_cleanup_meminfo(&ei) < 0) {
926 pr_warning("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
927 return false;
900 } 928 }
901 929
902 if (num_nodes < 0) 930 /* commit */
903 return num_nodes; 931 numa_meminfo = ei;
904 932
905 /* make sure all emulated nodes are mapped to a physical node */ 933 /* make sure all emulated nodes are mapped to a physical node */
906 for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) 934 for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
907 if (emu_nid_to_phys[i] == NUMA_NO_NODE) 935 if (emu_nid_to_phys[i] == NUMA_NO_NODE)
908 emu_nid_to_phys[i] = 0; 936 emu_nid_to_phys[i] = 0;
909 937
910 ei.nr_blks = num_nodes; 938 fake_physnodes(acpi, amd, &ei);
911 for (i = 0; i < ei.nr_blks; i++) {
912 ei.blk[i].start = nodes[i].start;
913 ei.blk[i].end = nodes[i].end;
914 ei.blk[i].nid = i;
915 }
916
917 memnode_shift = compute_hash_shift(&ei);
918 if (memnode_shift < 0) {
919 memnode_shift = 0;
920 printk(KERN_ERR "No NUMA hash function found. NUMA emulation "
921 "disabled.\n");
922 return -1;
923 }
924
925 /*
926 * We need to vacate all active ranges that may have been registered for
927 * the e820 memory map.
928 */
929 remove_all_active_ranges();
930 for_each_node_mask(i, node_possible_map)
931 memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
932 nodes[i].end >> PAGE_SHIFT);
933 init_memory_mapping_high();
934 for_each_node_mask(i, node_possible_map)
935 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
936 fake_physnodes(acpi, amd, num_nodes);
937 numa_init_array();
938 numa_emu_dist = true; 939 numa_emu_dist = true;
939 return 0; 940 return true;
940} 941}
941#endif /* CONFIG_NUMA_EMU */ 942#endif /* CONFIG_NUMA_EMU */
942 943
@@ -988,15 +989,13 @@ void __init initmem_init(void)
988 continue; 989 continue;
989#ifdef CONFIG_NUMA_EMU 990#ifdef CONFIG_NUMA_EMU
990 setup_physnodes(0, max_pfn << PAGE_SHIFT); 991 setup_physnodes(0, max_pfn << PAGE_SHIFT);
991 if (emu_cmdline && !numa_emulation(i == 0, i == 1)) 992 /*
992 return; 993 * If requested, try emulation. If emulation is not used,
993 994 * build identity emu_nid_to_phys[] for numa_add_cpu()
994 /* not emulating, build identity mapping for numa_add_cpu() */ 995 */
995 for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++) 996 if (!emu_cmdline || !numa_emulation(i == 0, i == 1))
996 emu_nid_to_phys[j] = j; 997 for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++)
997 998 emu_nid_to_phys[j] = j;
998 nodes_clear(node_possible_map);
999 nodes_clear(node_online_map);
1000#endif 999#endif
1001 if (numa_register_memblks(&numa_meminfo) < 0) 1000 if (numa_register_memblks(&numa_meminfo) < 0)
1002 continue; 1001 continue;