aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/mm/numa_64.c171
1 files changed, 71 insertions, 100 deletions
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index dc9516587cf5..bd086ebc0ffc 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -541,8 +541,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
541 541
542#ifdef CONFIG_NUMA_EMU 542#ifdef CONFIG_NUMA_EMU
543/* Numa emulation */ 543/* Numa emulation */
544static struct bootnode physnodes[MAX_NUMNODES] __initdata;
545
546static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata; 544static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata;
547static char *emu_cmdline __initdata; 545static char *emu_cmdline __initdata;
548 546
@@ -551,6 +549,16 @@ void __init numa_emu_cmdline(char *str)
551 emu_cmdline = str; 549 emu_cmdline = str;
552} 550}
553 551
552static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
553{
554 int i;
555
556 for (i = 0; i < mi->nr_blks; i++)
557 if (mi->blk[i].nid == nid)
558 return i;
559 return -ENOENT;
560}
561
554int __init find_node_by_addr(unsigned long addr) 562int __init find_node_by_addr(unsigned long addr)
555{ 563{
556 const struct numa_meminfo *mi = &numa_meminfo; 564 const struct numa_meminfo *mi = &numa_meminfo;
@@ -568,63 +576,6 @@ int __init find_node_by_addr(unsigned long addr)
568 return NUMA_NO_NODE; 576 return NUMA_NO_NODE;
569} 577}
570 578
571static int __init setup_physnodes(unsigned long start, unsigned long end)
572{
573 const struct numa_meminfo *mi = &numa_meminfo;
574 int ret = 0;
575 int i;
576
577 memset(physnodes, 0, sizeof(physnodes));
578
579 for (i = 0; i < mi->nr_blks; i++) {
580 int nid = mi->blk[i].nid;
581
582 if (physnodes[nid].start == physnodes[nid].end) {
583 physnodes[nid].start = mi->blk[i].start;
584 physnodes[nid].end = mi->blk[i].end;
585 } else {
586 physnodes[nid].start = min(physnodes[nid].start,
587 mi->blk[i].start);
588 physnodes[nid].end = max(physnodes[nid].end,
589 mi->blk[i].end);
590 }
591 }
592
593 /*
594 * Basic sanity checking on the physical node map: there may be errors
595 * if the SRAT or AMD code incorrectly reported the topology or the mem=
596 * kernel parameter is used.
597 */
598 for (i = 0; i < MAX_NUMNODES; i++) {
599 if (physnodes[i].start == physnodes[i].end)
600 continue;
601 if (physnodes[i].start > end) {
602 physnodes[i].end = physnodes[i].start;
603 continue;
604 }
605 if (physnodes[i].end < start) {
606 physnodes[i].start = physnodes[i].end;
607 continue;
608 }
609 if (physnodes[i].start < start)
610 physnodes[i].start = start;
611 if (physnodes[i].end > end)
612 physnodes[i].end = end;
613 ret++;
614 }
615
616 /*
617 * If no physical topology was detected, a single node is faked to cover
618 * the entire address space.
619 */
620 if (!ret) {
621 physnodes[ret].start = start;
622 physnodes[ret].end = end;
623 ret = 1;
624 }
625 return ret;
626}
627
628static void __init fake_physnodes(int acpi, int amd, 579static void __init fake_physnodes(int acpi, int amd,
629 const struct numa_meminfo *ei) 580 const struct numa_meminfo *ei)
630{ 581{
@@ -663,9 +614,11 @@ static void __init fake_physnodes(int acpi, int amd,
663 * something went wrong, 0 otherwise. 614 * something went wrong, 0 otherwise.
664 */ 615 */
665static int __init emu_setup_memblk(struct numa_meminfo *ei, 616static int __init emu_setup_memblk(struct numa_meminfo *ei,
666 int nid, int physnid, u64 start, u64 end) 617 struct numa_meminfo *pi,
618 int nid, int phys_blk, u64 size)
667{ 619{
668 struct numa_memblk *eb = &ei->blk[ei->nr_blks]; 620 struct numa_memblk *eb = &ei->blk[ei->nr_blks];
621 struct numa_memblk *pb = &pi->blk[phys_blk];
669 622
670 if (ei->nr_blks >= NR_NODE_MEMBLKS) { 623 if (ei->nr_blks >= NR_NODE_MEMBLKS) {
671 pr_err("NUMA: Too many emulated memblks, failing emulation\n"); 624 pr_err("NUMA: Too many emulated memblks, failing emulation\n");
@@ -673,12 +626,18 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
673 } 626 }
674 627
675 ei->nr_blks++; 628 ei->nr_blks++;
676 eb->start = start; 629 eb->start = pb->start;
677 eb->end = end; 630 eb->end = pb->start + size;
678 eb->nid = nid; 631 eb->nid = nid;
679 632
680 if (emu_nid_to_phys[nid] == NUMA_NO_NODE) 633 if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
681 emu_nid_to_phys[nid] = physnid; 634 emu_nid_to_phys[nid] = pb->nid;
635
636 pb->start += size;
637 if (pb->start >= pb->end) {
638 WARN_ON_ONCE(pb->start > pb->end);
639 numa_remove_memblk_from(phys_blk, pi);
640 }
682 641
683 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, 642 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
684 eb->start, eb->end, (eb->end - eb->start) >> 20); 643 eb->start, eb->end, (eb->end - eb->start) >> 20);
@@ -690,6 +649,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
690 * to max_addr. The return value is the number of nodes allocated. 649 * to max_addr. The return value is the number of nodes allocated.
691 */ 650 */
692static int __init split_nodes_interleave(struct numa_meminfo *ei, 651static int __init split_nodes_interleave(struct numa_meminfo *ei,
652 struct numa_meminfo *pi,
693 u64 addr, u64 max_addr, int nr_nodes) 653 u64 addr, u64 max_addr, int nr_nodes)
694{ 654{
695 nodemask_t physnode_mask = NODE_MASK_NONE; 655 nodemask_t physnode_mask = NODE_MASK_NONE;
@@ -721,9 +681,8 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
721 return -1; 681 return -1;
722 } 682 }
723 683
724 for (i = 0; i < MAX_NUMNODES; i++) 684 for (i = 0; i < pi->nr_blks; i++)
725 if (physnodes[i].start != physnodes[i].end) 685 node_set(pi->blk[i].nid, physnode_mask);
726 node_set(i, physnode_mask);
727 686
728 /* 687 /*
729 * Continue to fill physical nodes with fake nodes until there is no 688 * Continue to fill physical nodes with fake nodes until there is no
@@ -731,8 +690,18 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
731 */ 690 */
732 while (nodes_weight(physnode_mask)) { 691 while (nodes_weight(physnode_mask)) {
733 for_each_node_mask(i, physnode_mask) { 692 for_each_node_mask(i, physnode_mask) {
734 u64 end = physnodes[i].start + size;
735 u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); 693 u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
694 u64 start, limit, end;
695 int phys_blk;
696
697 phys_blk = emu_find_memblk_by_nid(i, pi);
698 if (phys_blk < 0) {
699 node_clear(i, physnode_mask);
700 continue;
701 }
702 start = pi->blk[phys_blk].start;
703 limit = pi->blk[phys_blk].end;
704 end = start + size;
736 705
737 if (nid < big) 706 if (nid < big)
738 end += FAKE_NODE_MIN_SIZE; 707 end += FAKE_NODE_MIN_SIZE;
@@ -741,11 +710,11 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
741 * Continue to add memory to this fake node if its 710 * Continue to add memory to this fake node if its
742 * non-reserved memory is less than the per-node size. 711 * non-reserved memory is less than the per-node size.
743 */ 712 */
744 while (end - physnodes[i].start - 713 while (end - start -
745 memblock_x86_hole_size(physnodes[i].start, end) < size) { 714 memblock_x86_hole_size(start, end) < size) {
746 end += FAKE_NODE_MIN_SIZE; 715 end += FAKE_NODE_MIN_SIZE;
747 if (end > physnodes[i].end) { 716 if (end > limit) {
748 end = physnodes[i].end; 717 end = limit;
749 break; 718 break;
750 } 719 }
751 } 720 }
@@ -764,19 +733,15 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
764 * next node, this one must extend to the end of the 733 * next node, this one must extend to the end of the
765 * physical node. 734 * physical node.
766 */ 735 */
767 if (physnodes[i].end - end - 736 if (limit - end -
768 memblock_x86_hole_size(end, physnodes[i].end) < size) 737 memblock_x86_hole_size(end, limit) < size)
769 end = physnodes[i].end; 738 end = limit;
770 739
771 ret = emu_setup_memblk(ei, nid++ % nr_nodes, i, 740 ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes,
772 physnodes[i].start, 741 phys_blk,
773 min(end, physnodes[i].end)); 742 min(end, limit) - start);
774 if (ret < 0) 743 if (ret < 0)
775 return ret; 744 return ret;
776
777 physnodes[i].start = min(end, physnodes[i].end);
778 if (physnodes[i].start == physnodes[i].end)
779 node_clear(i, physnode_mask);
780 } 745 }
781 } 746 }
782 return 0; 747 return 0;
@@ -805,6 +770,7 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
805 * `addr' to `max_addr'. The return value is the number of nodes allocated. 770 * `addr' to `max_addr'. The return value is the number of nodes allocated.
806 */ 771 */
807static int __init split_nodes_size_interleave(struct numa_meminfo *ei, 772static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
773 struct numa_meminfo *pi,
808 u64 addr, u64 max_addr, u64 size) 774 u64 addr, u64 max_addr, u64 size)
809{ 775{
810 nodemask_t physnode_mask = NODE_MASK_NONE; 776 nodemask_t physnode_mask = NODE_MASK_NONE;
@@ -833,9 +799,9 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
833 } 799 }
834 size &= FAKE_NODE_MIN_HASH_MASK; 800 size &= FAKE_NODE_MIN_HASH_MASK;
835 801
836 for (i = 0; i < MAX_NUMNODES; i++) 802 for (i = 0; i < pi->nr_blks; i++)
837 if (physnodes[i].start != physnodes[i].end) 803 node_set(pi->blk[i].nid, physnode_mask);
838 node_set(i, physnode_mask); 804
839 /* 805 /*
840 * Fill physical nodes with fake nodes of size until there is no memory 806 * Fill physical nodes with fake nodes of size until there is no memory
841 * left on any of them. 807 * left on any of them.
@@ -843,10 +809,18 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
843 while (nodes_weight(physnode_mask)) { 809 while (nodes_weight(physnode_mask)) {
844 for_each_node_mask(i, physnode_mask) { 810 for_each_node_mask(i, physnode_mask) {
845 u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT; 811 u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
846 u64 end; 812 u64 start, limit, end;
813 int phys_blk;
847 814
848 end = find_end_of_node(physnodes[i].start, 815 phys_blk = emu_find_memblk_by_nid(i, pi);
849 physnodes[i].end, size); 816 if (phys_blk < 0) {
817 node_clear(i, physnode_mask);
818 continue;
819 }
820 start = pi->blk[phys_blk].start;
821 limit = pi->blk[phys_blk].end;
822
823 end = find_end_of_node(start, limit, size);
850 /* 824 /*
851 * If there won't be at least FAKE_NODE_MIN_SIZE of 825 * If there won't be at least FAKE_NODE_MIN_SIZE of
852 * non-reserved memory in ZONE_DMA32 for the next node, 826 * non-reserved memory in ZONE_DMA32 for the next node,
@@ -861,19 +835,15 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
861 * next node, this one must extend to the end of the 835 * next node, this one must extend to the end of the
862 * physical node. 836 * physical node.
863 */ 837 */
864 if (physnodes[i].end - end - 838 if (limit - end -
865 memblock_x86_hole_size(end, physnodes[i].end) < size) 839 memblock_x86_hole_size(end, limit) < size)
866 end = physnodes[i].end; 840 end = limit;
867 841
868 ret = emu_setup_memblk(ei, nid++ % MAX_NUMNODES, i, 842 ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
869 physnodes[i].start, 843 phys_blk,
870 min(end, physnodes[i].end)); 844 min(end, limit) - start);
871 if (ret < 0) 845 if (ret < 0)
872 return ret; 846 return ret;
873
874 physnodes[i].start = min(end, physnodes[i].end);
875 if (physnodes[i].start == physnodes[i].end)
876 node_clear(i, physnode_mask);
877 } 847 }
878 } 848 }
879 return 0; 849 return 0;
@@ -886,10 +856,12 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
886static bool __init numa_emulation(int acpi, int amd) 856static bool __init numa_emulation(int acpi, int amd)
887{ 857{
888 static struct numa_meminfo ei __initdata; 858 static struct numa_meminfo ei __initdata;
859 static struct numa_meminfo pi __initdata;
889 const u64 max_addr = max_pfn << PAGE_SHIFT; 860 const u64 max_addr = max_pfn << PAGE_SHIFT;
890 int i, ret; 861 int i, ret;
891 862
892 memset(&ei, 0, sizeof(ei)); 863 memset(&ei, 0, sizeof(ei));
864 pi = numa_meminfo;
893 865
894 for (i = 0; i < MAX_NUMNODES; i++) 866 for (i = 0; i < MAX_NUMNODES; i++)
895 emu_nid_to_phys[i] = NUMA_NO_NODE; 867 emu_nid_to_phys[i] = NUMA_NO_NODE;
@@ -903,12 +875,12 @@ static bool __init numa_emulation(int acpi, int amd)
903 u64 size; 875 u64 size;
904 876
905 size = memparse(emu_cmdline, &emu_cmdline); 877 size = memparse(emu_cmdline, &emu_cmdline);
906 ret = split_nodes_size_interleave(&ei, 0, max_addr, size); 878 ret = split_nodes_size_interleave(&ei, &pi, 0, max_addr, size);
907 } else { 879 } else {
908 unsigned long n; 880 unsigned long n;
909 881
910 n = simple_strtoul(emu_cmdline, NULL, 0); 882 n = simple_strtoul(emu_cmdline, NULL, 0);
911 ret = split_nodes_interleave(&ei, 0, max_addr, n); 883 ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n);
912 } 884 }
913 885
914 if (ret < 0) 886 if (ret < 0)
@@ -980,7 +952,6 @@ void __init initmem_init(void)
980 if (numa_cleanup_meminfo(&numa_meminfo) < 0) 952 if (numa_cleanup_meminfo(&numa_meminfo) < 0)
981 continue; 953 continue;
982#ifdef CONFIG_NUMA_EMU 954#ifdef CONFIG_NUMA_EMU
983 setup_physnodes(0, max_pfn << PAGE_SHIFT);
984 /* 955 /*
985 * If requested, try emulation. If emulation is not used, 956 * If requested, try emulation. If emulation is not used,
986 * build identity emu_nid_to_phys[] for numa_add_cpu() 957 * build identity emu_nid_to_phys[] for numa_add_cpu()