diff options
-rw-r--r-- | arch/x86/mm/numa_64.c | 171 |
1 files changed, 71 insertions, 100 deletions
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index dc9516587cf5..bd086ebc0ffc 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -541,8 +541,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) | |||
541 | 541 | ||
542 | #ifdef CONFIG_NUMA_EMU | 542 | #ifdef CONFIG_NUMA_EMU |
543 | /* Numa emulation */ | 543 | /* Numa emulation */ |
544 | static struct bootnode physnodes[MAX_NUMNODES] __initdata; | ||
545 | |||
546 | static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata; | 544 | static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata; |
547 | static char *emu_cmdline __initdata; | 545 | static char *emu_cmdline __initdata; |
548 | 546 | ||
@@ -551,6 +549,16 @@ void __init numa_emu_cmdline(char *str) | |||
551 | emu_cmdline = str; | 549 | emu_cmdline = str; |
552 | } | 550 | } |
553 | 551 | ||
552 | static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi) | ||
553 | { | ||
554 | int i; | ||
555 | |||
556 | for (i = 0; i < mi->nr_blks; i++) | ||
557 | if (mi->blk[i].nid == nid) | ||
558 | return i; | ||
559 | return -ENOENT; | ||
560 | } | ||
561 | |||
554 | int __init find_node_by_addr(unsigned long addr) | 562 | int __init find_node_by_addr(unsigned long addr) |
555 | { | 563 | { |
556 | const struct numa_meminfo *mi = &numa_meminfo; | 564 | const struct numa_meminfo *mi = &numa_meminfo; |
@@ -568,63 +576,6 @@ int __init find_node_by_addr(unsigned long addr) | |||
568 | return NUMA_NO_NODE; | 576 | return NUMA_NO_NODE; |
569 | } | 577 | } |
570 | 578 | ||
571 | static int __init setup_physnodes(unsigned long start, unsigned long end) | ||
572 | { | ||
573 | const struct numa_meminfo *mi = &numa_meminfo; | ||
574 | int ret = 0; | ||
575 | int i; | ||
576 | |||
577 | memset(physnodes, 0, sizeof(physnodes)); | ||
578 | |||
579 | for (i = 0; i < mi->nr_blks; i++) { | ||
580 | int nid = mi->blk[i].nid; | ||
581 | |||
582 | if (physnodes[nid].start == physnodes[nid].end) { | ||
583 | physnodes[nid].start = mi->blk[i].start; | ||
584 | physnodes[nid].end = mi->blk[i].end; | ||
585 | } else { | ||
586 | physnodes[nid].start = min(physnodes[nid].start, | ||
587 | mi->blk[i].start); | ||
588 | physnodes[nid].end = max(physnodes[nid].end, | ||
589 | mi->blk[i].end); | ||
590 | } | ||
591 | } | ||
592 | |||
593 | /* | ||
594 | * Basic sanity checking on the physical node map: there may be errors | ||
595 | * if the SRAT or AMD code incorrectly reported the topology or the mem= | ||
596 | * kernel parameter is used. | ||
597 | */ | ||
598 | for (i = 0; i < MAX_NUMNODES; i++) { | ||
599 | if (physnodes[i].start == physnodes[i].end) | ||
600 | continue; | ||
601 | if (physnodes[i].start > end) { | ||
602 | physnodes[i].end = physnodes[i].start; | ||
603 | continue; | ||
604 | } | ||
605 | if (physnodes[i].end < start) { | ||
606 | physnodes[i].start = physnodes[i].end; | ||
607 | continue; | ||
608 | } | ||
609 | if (physnodes[i].start < start) | ||
610 | physnodes[i].start = start; | ||
611 | if (physnodes[i].end > end) | ||
612 | physnodes[i].end = end; | ||
613 | ret++; | ||
614 | } | ||
615 | |||
616 | /* | ||
617 | * If no physical topology was detected, a single node is faked to cover | ||
618 | * the entire address space. | ||
619 | */ | ||
620 | if (!ret) { | ||
621 | physnodes[ret].start = start; | ||
622 | physnodes[ret].end = end; | ||
623 | ret = 1; | ||
624 | } | ||
625 | return ret; | ||
626 | } | ||
627 | |||
628 | static void __init fake_physnodes(int acpi, int amd, | 579 | static void __init fake_physnodes(int acpi, int amd, |
629 | const struct numa_meminfo *ei) | 580 | const struct numa_meminfo *ei) |
630 | { | 581 | { |
@@ -663,9 +614,11 @@ static void __init fake_physnodes(int acpi, int amd, | |||
663 | * something went wrong, 0 otherwise. | 614 | * something went wrong, 0 otherwise. |
664 | */ | 615 | */ |
665 | static int __init emu_setup_memblk(struct numa_meminfo *ei, | 616 | static int __init emu_setup_memblk(struct numa_meminfo *ei, |
666 | int nid, int physnid, u64 start, u64 end) | 617 | struct numa_meminfo *pi, |
618 | int nid, int phys_blk, u64 size) | ||
667 | { | 619 | { |
668 | struct numa_memblk *eb = &ei->blk[ei->nr_blks]; | 620 | struct numa_memblk *eb = &ei->blk[ei->nr_blks]; |
621 | struct numa_memblk *pb = &pi->blk[phys_blk]; | ||
669 | 622 | ||
670 | if (ei->nr_blks >= NR_NODE_MEMBLKS) { | 623 | if (ei->nr_blks >= NR_NODE_MEMBLKS) { |
671 | pr_err("NUMA: Too many emulated memblks, failing emulation\n"); | 624 | pr_err("NUMA: Too many emulated memblks, failing emulation\n"); |
@@ -673,12 +626,18 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei, | |||
673 | } | 626 | } |
674 | 627 | ||
675 | ei->nr_blks++; | 628 | ei->nr_blks++; |
676 | eb->start = start; | 629 | eb->start = pb->start; |
677 | eb->end = end; | 630 | eb->end = pb->start + size; |
678 | eb->nid = nid; | 631 | eb->nid = nid; |
679 | 632 | ||
680 | if (emu_nid_to_phys[nid] == NUMA_NO_NODE) | 633 | if (emu_nid_to_phys[nid] == NUMA_NO_NODE) |
681 | emu_nid_to_phys[nid] = physnid; | 634 | emu_nid_to_phys[nid] = pb->nid; |
635 | |||
636 | pb->start += size; | ||
637 | if (pb->start >= pb->end) { | ||
638 | WARN_ON_ONCE(pb->start > pb->end); | ||
639 | numa_remove_memblk_from(phys_blk, pi); | ||
640 | } | ||
682 | 641 | ||
683 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, | 642 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, |
684 | eb->start, eb->end, (eb->end - eb->start) >> 20); | 643 | eb->start, eb->end, (eb->end - eb->start) >> 20); |
@@ -690,6 +649,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei, | |||
690 | * to max_addr. The return value is the number of nodes allocated. | 649 | * to max_addr. The return value is the number of nodes allocated. |
691 | */ | 650 | */ |
692 | static int __init split_nodes_interleave(struct numa_meminfo *ei, | 651 | static int __init split_nodes_interleave(struct numa_meminfo *ei, |
652 | struct numa_meminfo *pi, | ||
693 | u64 addr, u64 max_addr, int nr_nodes) | 653 | u64 addr, u64 max_addr, int nr_nodes) |
694 | { | 654 | { |
695 | nodemask_t physnode_mask = NODE_MASK_NONE; | 655 | nodemask_t physnode_mask = NODE_MASK_NONE; |
@@ -721,9 +681,8 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, | |||
721 | return -1; | 681 | return -1; |
722 | } | 682 | } |
723 | 683 | ||
724 | for (i = 0; i < MAX_NUMNODES; i++) | 684 | for (i = 0; i < pi->nr_blks; i++) |
725 | if (physnodes[i].start != physnodes[i].end) | 685 | node_set(pi->blk[i].nid, physnode_mask); |
726 | node_set(i, physnode_mask); | ||
727 | 686 | ||
728 | /* | 687 | /* |
729 | * Continue to fill physical nodes with fake nodes until there is no | 688 | * Continue to fill physical nodes with fake nodes until there is no |
@@ -731,8 +690,18 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, | |||
731 | */ | 690 | */ |
732 | while (nodes_weight(physnode_mask)) { | 691 | while (nodes_weight(physnode_mask)) { |
733 | for_each_node_mask(i, physnode_mask) { | 692 | for_each_node_mask(i, physnode_mask) { |
734 | u64 end = physnodes[i].start + size; | ||
735 | u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); | 693 | u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); |
694 | u64 start, limit, end; | ||
695 | int phys_blk; | ||
696 | |||
697 | phys_blk = emu_find_memblk_by_nid(i, pi); | ||
698 | if (phys_blk < 0) { | ||
699 | node_clear(i, physnode_mask); | ||
700 | continue; | ||
701 | } | ||
702 | start = pi->blk[phys_blk].start; | ||
703 | limit = pi->blk[phys_blk].end; | ||
704 | end = start + size; | ||
736 | 705 | ||
737 | if (nid < big) | 706 | if (nid < big) |
738 | end += FAKE_NODE_MIN_SIZE; | 707 | end += FAKE_NODE_MIN_SIZE; |
@@ -741,11 +710,11 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, | |||
741 | * Continue to add memory to this fake node if its | 710 | * Continue to add memory to this fake node if its |
742 | * non-reserved memory is less than the per-node size. | 711 | * non-reserved memory is less than the per-node size. |
743 | */ | 712 | */ |
744 | while (end - physnodes[i].start - | 713 | while (end - start - |
745 | memblock_x86_hole_size(physnodes[i].start, end) < size) { | 714 | memblock_x86_hole_size(start, end) < size) { |
746 | end += FAKE_NODE_MIN_SIZE; | 715 | end += FAKE_NODE_MIN_SIZE; |
747 | if (end > physnodes[i].end) { | 716 | if (end > limit) { |
748 | end = physnodes[i].end; | 717 | end = limit; |
749 | break; | 718 | break; |
750 | } | 719 | } |
751 | } | 720 | } |
@@ -764,19 +733,15 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, | |||
764 | * next node, this one must extend to the end of the | 733 | * next node, this one must extend to the end of the |
765 | * physical node. | 734 | * physical node. |
766 | */ | 735 | */ |
767 | if (physnodes[i].end - end - | 736 | if (limit - end - |
768 | memblock_x86_hole_size(end, physnodes[i].end) < size) | 737 | memblock_x86_hole_size(end, limit) < size) |
769 | end = physnodes[i].end; | 738 | end = limit; |
770 | 739 | ||
771 | ret = emu_setup_memblk(ei, nid++ % nr_nodes, i, | 740 | ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes, |
772 | physnodes[i].start, | 741 | phys_blk, |
773 | min(end, physnodes[i].end)); | 742 | min(end, limit) - start); |
774 | if (ret < 0) | 743 | if (ret < 0) |
775 | return ret; | 744 | return ret; |
776 | |||
777 | physnodes[i].start = min(end, physnodes[i].end); | ||
778 | if (physnodes[i].start == physnodes[i].end) | ||
779 | node_clear(i, physnode_mask); | ||
780 | } | 745 | } |
781 | } | 746 | } |
782 | return 0; | 747 | return 0; |
@@ -805,6 +770,7 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size) | |||
805 | * `addr' to `max_addr'. The return value is the number of nodes allocated. | 770 | * `addr' to `max_addr'. The return value is the number of nodes allocated. |
806 | */ | 771 | */ |
807 | static int __init split_nodes_size_interleave(struct numa_meminfo *ei, | 772 | static int __init split_nodes_size_interleave(struct numa_meminfo *ei, |
773 | struct numa_meminfo *pi, | ||
808 | u64 addr, u64 max_addr, u64 size) | 774 | u64 addr, u64 max_addr, u64 size) |
809 | { | 775 | { |
810 | nodemask_t physnode_mask = NODE_MASK_NONE; | 776 | nodemask_t physnode_mask = NODE_MASK_NONE; |
@@ -833,9 +799,9 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, | |||
833 | } | 799 | } |
834 | size &= FAKE_NODE_MIN_HASH_MASK; | 800 | size &= FAKE_NODE_MIN_HASH_MASK; |
835 | 801 | ||
836 | for (i = 0; i < MAX_NUMNODES; i++) | 802 | for (i = 0; i < pi->nr_blks; i++) |
837 | if (physnodes[i].start != physnodes[i].end) | 803 | node_set(pi->blk[i].nid, physnode_mask); |
838 | node_set(i, physnode_mask); | 804 | |
839 | /* | 805 | /* |
840 | * Fill physical nodes with fake nodes of size until there is no memory | 806 | * Fill physical nodes with fake nodes of size until there is no memory |
841 | * left on any of them. | 807 | * left on any of them. |
@@ -843,10 +809,18 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, | |||
843 | while (nodes_weight(physnode_mask)) { | 809 | while (nodes_weight(physnode_mask)) { |
844 | for_each_node_mask(i, physnode_mask) { | 810 | for_each_node_mask(i, physnode_mask) { |
845 | u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT; | 811 | u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT; |
846 | u64 end; | 812 | u64 start, limit, end; |
813 | int phys_blk; | ||
847 | 814 | ||
848 | end = find_end_of_node(physnodes[i].start, | 815 | phys_blk = emu_find_memblk_by_nid(i, pi); |
849 | physnodes[i].end, size); | 816 | if (phys_blk < 0) { |
817 | node_clear(i, physnode_mask); | ||
818 | continue; | ||
819 | } | ||
820 | start = pi->blk[phys_blk].start; | ||
821 | limit = pi->blk[phys_blk].end; | ||
822 | |||
823 | end = find_end_of_node(start, limit, size); | ||
850 | /* | 824 | /* |
851 | * If there won't be at least FAKE_NODE_MIN_SIZE of | 825 | * If there won't be at least FAKE_NODE_MIN_SIZE of |
852 | * non-reserved memory in ZONE_DMA32 for the next node, | 826 | * non-reserved memory in ZONE_DMA32 for the next node, |
@@ -861,19 +835,15 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, | |||
861 | * next node, this one must extend to the end of the | 835 | * next node, this one must extend to the end of the |
862 | * physical node. | 836 | * physical node. |
863 | */ | 837 | */ |
864 | if (physnodes[i].end - end - | 838 | if (limit - end - |
865 | memblock_x86_hole_size(end, physnodes[i].end) < size) | 839 | memblock_x86_hole_size(end, limit) < size) |
866 | end = physnodes[i].end; | 840 | end = limit; |
867 | 841 | ||
868 | ret = emu_setup_memblk(ei, nid++ % MAX_NUMNODES, i, | 842 | ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES, |
869 | physnodes[i].start, | 843 | phys_blk, |
870 | min(end, physnodes[i].end)); | 844 | min(end, limit) - start); |
871 | if (ret < 0) | 845 | if (ret < 0) |
872 | return ret; | 846 | return ret; |
873 | |||
874 | physnodes[i].start = min(end, physnodes[i].end); | ||
875 | if (physnodes[i].start == physnodes[i].end) | ||
876 | node_clear(i, physnode_mask); | ||
877 | } | 847 | } |
878 | } | 848 | } |
879 | return 0; | 849 | return 0; |
@@ -886,10 +856,12 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, | |||
886 | static bool __init numa_emulation(int acpi, int amd) | 856 | static bool __init numa_emulation(int acpi, int amd) |
887 | { | 857 | { |
888 | static struct numa_meminfo ei __initdata; | 858 | static struct numa_meminfo ei __initdata; |
859 | static struct numa_meminfo pi __initdata; | ||
889 | const u64 max_addr = max_pfn << PAGE_SHIFT; | 860 | const u64 max_addr = max_pfn << PAGE_SHIFT; |
890 | int i, ret; | 861 | int i, ret; |
891 | 862 | ||
892 | memset(&ei, 0, sizeof(ei)); | 863 | memset(&ei, 0, sizeof(ei)); |
864 | pi = numa_meminfo; | ||
893 | 865 | ||
894 | for (i = 0; i < MAX_NUMNODES; i++) | 866 | for (i = 0; i < MAX_NUMNODES; i++) |
895 | emu_nid_to_phys[i] = NUMA_NO_NODE; | 867 | emu_nid_to_phys[i] = NUMA_NO_NODE; |
@@ -903,12 +875,12 @@ static bool __init numa_emulation(int acpi, int amd) | |||
903 | u64 size; | 875 | u64 size; |
904 | 876 | ||
905 | size = memparse(emu_cmdline, &emu_cmdline); | 877 | size = memparse(emu_cmdline, &emu_cmdline); |
906 | ret = split_nodes_size_interleave(&ei, 0, max_addr, size); | 878 | ret = split_nodes_size_interleave(&ei, &pi, 0, max_addr, size); |
907 | } else { | 879 | } else { |
908 | unsigned long n; | 880 | unsigned long n; |
909 | 881 | ||
910 | n = simple_strtoul(emu_cmdline, NULL, 0); | 882 | n = simple_strtoul(emu_cmdline, NULL, 0); |
911 | ret = split_nodes_interleave(&ei, 0, max_addr, n); | 883 | ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n); |
912 | } | 884 | } |
913 | 885 | ||
914 | if (ret < 0) | 886 | if (ret < 0) |
@@ -980,7 +952,6 @@ void __init initmem_init(void) | |||
980 | if (numa_cleanup_meminfo(&numa_meminfo) < 0) | 952 | if (numa_cleanup_meminfo(&numa_meminfo) < 0) |
981 | continue; | 953 | continue; |
982 | #ifdef CONFIG_NUMA_EMU | 954 | #ifdef CONFIG_NUMA_EMU |
983 | setup_physnodes(0, max_pfn << PAGE_SHIFT); | ||
984 | /* | 955 | /* |
985 | * If requested, try emulation. If emulation is not used, | 956 | * If requested, try emulation. If emulation is not used, |
986 | * build identity emu_nid_to_phys[] for numa_add_cpu() | 957 | * build identity emu_nid_to_phys[] for numa_add_cpu() |