aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-02-16 11:11:10 -0500
committerTejun Heo <tj@kernel.org>2011-02-16 11:11:10 -0500
commit1cca53407336fb6a86092e36dbc5c1e4d45d912b (patch)
treed6659b944d1ee5a472a7155753c08e185ba73a79 /arch/x86/mm
parent775ee85d7bff8ce7c7eccde90eda400658b650a3 (diff)
x86-64, NUMA: Emulate directly from numa_meminfo
NUMA emulation built physnodes[] array which could only represent configurations from the physical meminfo and emulated nodes using the information. There's no reason to take this extra level of indirection. Update emulation functions so that they operate directly on numa_meminfo. This simplifies the code and makes emulation layout behave better with interleaved physical nodes. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Brian Gerst <brgerst@gmail.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Shaohui Zheng <shaohui.zheng@intel.com> Cc: David Rientjes <rientjes@google.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/numa_64.c171
1 files changed, 71 insertions, 100 deletions
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index dc9516587cf5..bd086ebc0ffc 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -541,8 +541,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
541 541
542#ifdef CONFIG_NUMA_EMU 542#ifdef CONFIG_NUMA_EMU
543/* Numa emulation */ 543/* Numa emulation */
544static struct bootnode physnodes[MAX_NUMNODES] __initdata;
545
546static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata; 544static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata;
547static char *emu_cmdline __initdata; 545static char *emu_cmdline __initdata;
548 546
@@ -551,6 +549,16 @@ void __init numa_emu_cmdline(char *str)
551 emu_cmdline = str; 549 emu_cmdline = str;
552} 550}
553 551
552static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
553{
554 int i;
555
556 for (i = 0; i < mi->nr_blks; i++)
557 if (mi->blk[i].nid == nid)
558 return i;
559 return -ENOENT;
560}
561
554int __init find_node_by_addr(unsigned long addr) 562int __init find_node_by_addr(unsigned long addr)
555{ 563{
556 const struct numa_meminfo *mi = &numa_meminfo; 564 const struct numa_meminfo *mi = &numa_meminfo;
@@ -568,63 +576,6 @@ int __init find_node_by_addr(unsigned long addr)
568 return NUMA_NO_NODE; 576 return NUMA_NO_NODE;
569} 577}
570 578
571static int __init setup_physnodes(unsigned long start, unsigned long end)
572{
573 const struct numa_meminfo *mi = &numa_meminfo;
574 int ret = 0;
575 int i;
576
577 memset(physnodes, 0, sizeof(physnodes));
578
579 for (i = 0; i < mi->nr_blks; i++) {
580 int nid = mi->blk[i].nid;
581
582 if (physnodes[nid].start == physnodes[nid].end) {
583 physnodes[nid].start = mi->blk[i].start;
584 physnodes[nid].end = mi->blk[i].end;
585 } else {
586 physnodes[nid].start = min(physnodes[nid].start,
587 mi->blk[i].start);
588 physnodes[nid].end = max(physnodes[nid].end,
589 mi->blk[i].end);
590 }
591 }
592
593 /*
594 * Basic sanity checking on the physical node map: there may be errors
595 * if the SRAT or AMD code incorrectly reported the topology or the mem=
596 * kernel parameter is used.
597 */
598 for (i = 0; i < MAX_NUMNODES; i++) {
599 if (physnodes[i].start == physnodes[i].end)
600 continue;
601 if (physnodes[i].start > end) {
602 physnodes[i].end = physnodes[i].start;
603 continue;
604 }
605 if (physnodes[i].end < start) {
606 physnodes[i].start = physnodes[i].end;
607 continue;
608 }
609 if (physnodes[i].start < start)
610 physnodes[i].start = start;
611 if (physnodes[i].end > end)
612 physnodes[i].end = end;
613 ret++;
614 }
615
616 /*
617 * If no physical topology was detected, a single node is faked to cover
618 * the entire address space.
619 */
620 if (!ret) {
621 physnodes[ret].start = start;
622 physnodes[ret].end = end;
623 ret = 1;
624 }
625 return ret;
626}
627
628static void __init fake_physnodes(int acpi, int amd, 579static void __init fake_physnodes(int acpi, int amd,
629 const struct numa_meminfo *ei) 580 const struct numa_meminfo *ei)
630{ 581{
@@ -663,9 +614,11 @@ static void __init fake_physnodes(int acpi, int amd,
663 * something went wrong, 0 otherwise. 614 * something went wrong, 0 otherwise.
664 */ 615 */
665static int __init emu_setup_memblk(struct numa_meminfo *ei, 616static int __init emu_setup_memblk(struct numa_meminfo *ei,
666 int nid, int physnid, u64 start, u64 end) 617 struct numa_meminfo *pi,
618 int nid, int phys_blk, u64 size)
667{ 619{
668 struct numa_memblk *eb = &ei->blk[ei->nr_blks]; 620 struct numa_memblk *eb = &ei->blk[ei->nr_blks];
621 struct numa_memblk *pb = &pi->blk[phys_blk];
669 622
670 if (ei->nr_blks >= NR_NODE_MEMBLKS) { 623 if (ei->nr_blks >= NR_NODE_MEMBLKS) {
671 pr_err("NUMA: Too many emulated memblks, failing emulation\n"); 624 pr_err("NUMA: Too many emulated memblks, failing emulation\n");
@@ -673,12 +626,18 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
673 } 626 }
674 627
675 ei->nr_blks++; 628 ei->nr_blks++;
676 eb->start = start; 629 eb->start = pb->start;
677 eb->end = end; 630 eb->end = pb->start + size;
678 eb->nid = nid; 631 eb->nid = nid;
679 632
680 if (emu_nid_to_phys[nid] == NUMA_NO_NODE) 633 if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
681 emu_nid_to_phys[nid] = physnid; 634 emu_nid_to_phys[nid] = pb->nid;
635
636 pb->start += size;
637 if (pb->start >= pb->end) {
638 WARN_ON_ONCE(pb->start > pb->end);
639 numa_remove_memblk_from(phys_blk, pi);
640 }
682 641
683 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, 642 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
684 eb->start, eb->end, (eb->end - eb->start) >> 20); 643 eb->start, eb->end, (eb->end - eb->start) >> 20);
@@ -690,6 +649,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
690 * to max_addr. The return value is the number of nodes allocated. 649 * to max_addr. The return value is the number of nodes allocated.
691 */ 650 */
692static int __init split_nodes_interleave(struct numa_meminfo *ei, 651static int __init split_nodes_interleave(struct numa_meminfo *ei,
652 struct numa_meminfo *pi,
693 u64 addr, u64 max_addr, int nr_nodes) 653 u64 addr, u64 max_addr, int nr_nodes)
694{ 654{
695 nodemask_t physnode_mask = NODE_MASK_NONE; 655 nodemask_t physnode_mask = NODE_MASK_NONE;
@@ -721,9 +681,8 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
721 return -1; 681 return -1;
722 } 682 }
723 683
724 for (i = 0; i < MAX_NUMNODES; i++) 684 for (i = 0; i < pi->nr_blks; i++)
725 if (physnodes[i].start != physnodes[i].end) 685 node_set(pi->blk[i].nid, physnode_mask);
726 node_set(i, physnode_mask);
727 686
728 /* 687 /*
729 * Continue to fill physical nodes with fake nodes until there is no 688 * Continue to fill physical nodes with fake nodes until there is no
@@ -731,8 +690,18 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
731 */ 690 */
732 while (nodes_weight(physnode_mask)) { 691 while (nodes_weight(physnode_mask)) {
733 for_each_node_mask(i, physnode_mask) { 692 for_each_node_mask(i, physnode_mask) {
734 u64 end = physnodes[i].start + size;
735 u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); 693 u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
694 u64 start, limit, end;
695 int phys_blk;
696
697 phys_blk = emu_find_memblk_by_nid(i, pi);
698 if (phys_blk < 0) {
699 node_clear(i, physnode_mask);
700 continue;
701 }
702 start = pi->blk[phys_blk].start;
703 limit = pi->blk[phys_blk].end;
704 end = start + size;
736 705
737 if (nid < big) 706 if (nid < big)
738 end += FAKE_NODE_MIN_SIZE; 707 end += FAKE_NODE_MIN_SIZE;
@@ -741,11 +710,11 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
741 * Continue to add memory to this fake node if its 710 * Continue to add memory to this fake node if its
742 * non-reserved memory is less than the per-node size. 711 * non-reserved memory is less than the per-node size.
743 */ 712 */
744 while (end - physnodes[i].start - 713 while (end - start -
745 memblock_x86_hole_size(physnodes[i].start, end) < size) { 714 memblock_x86_hole_size(start, end) < size) {
746 end += FAKE_NODE_MIN_SIZE; 715 end += FAKE_NODE_MIN_SIZE;
747 if (end > physnodes[i].end) { 716 if (end > limit) {
748 end = physnodes[i].end; 717 end = limit;
749 break; 718 break;
750 } 719 }
751 } 720 }
@@ -764,19 +733,15 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
764 * next node, this one must extend to the end of the 733 * next node, this one must extend to the end of the
765 * physical node. 734 * physical node.
766 */ 735 */
767 if (physnodes[i].end - end - 736 if (limit - end -
768 memblock_x86_hole_size(end, physnodes[i].end) < size) 737 memblock_x86_hole_size(end, limit) < size)
769 end = physnodes[i].end; 738 end = limit;
770 739
771 ret = emu_setup_memblk(ei, nid++ % nr_nodes, i, 740 ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes,
772 physnodes[i].start, 741 phys_blk,
773 min(end, physnodes[i].end)); 742 min(end, limit) - start);
774 if (ret < 0) 743 if (ret < 0)
775 return ret; 744 return ret;
776
777 physnodes[i].start = min(end, physnodes[i].end);
778 if (physnodes[i].start == physnodes[i].end)
779 node_clear(i, physnode_mask);
780 } 745 }
781 } 746 }
782 return 0; 747 return 0;
@@ -805,6 +770,7 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
805 * `addr' to `max_addr'. The return value is the number of nodes allocated. 770 * `addr' to `max_addr'. The return value is the number of nodes allocated.
806 */ 771 */
807static int __init split_nodes_size_interleave(struct numa_meminfo *ei, 772static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
773 struct numa_meminfo *pi,
808 u64 addr, u64 max_addr, u64 size) 774 u64 addr, u64 max_addr, u64 size)
809{ 775{
810 nodemask_t physnode_mask = NODE_MASK_NONE; 776 nodemask_t physnode_mask = NODE_MASK_NONE;
@@ -833,9 +799,9 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
833 } 799 }
834 size &= FAKE_NODE_MIN_HASH_MASK; 800 size &= FAKE_NODE_MIN_HASH_MASK;
835 801
836 for (i = 0; i < MAX_NUMNODES; i++) 802 for (i = 0; i < pi->nr_blks; i++)
837 if (physnodes[i].start != physnodes[i].end) 803 node_set(pi->blk[i].nid, physnode_mask);
838 node_set(i, physnode_mask); 804
839 /* 805 /*
840 * Fill physical nodes with fake nodes of size until there is no memory 806 * Fill physical nodes with fake nodes of size until there is no memory
841 * left on any of them. 807 * left on any of them.
@@ -843,10 +809,18 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
843 while (nodes_weight(physnode_mask)) { 809 while (nodes_weight(physnode_mask)) {
844 for_each_node_mask(i, physnode_mask) { 810 for_each_node_mask(i, physnode_mask) {
845 u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT; 811 u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
846 u64 end; 812 u64 start, limit, end;
813 int phys_blk;
847 814
848 end = find_end_of_node(physnodes[i].start, 815 phys_blk = emu_find_memblk_by_nid(i, pi);
849 physnodes[i].end, size); 816 if (phys_blk < 0) {
817 node_clear(i, physnode_mask);
818 continue;
819 }
820 start = pi->blk[phys_blk].start;
821 limit = pi->blk[phys_blk].end;
822
823 end = find_end_of_node(start, limit, size);
850 /* 824 /*
851 * If there won't be at least FAKE_NODE_MIN_SIZE of 825 * If there won't be at least FAKE_NODE_MIN_SIZE of
852 * non-reserved memory in ZONE_DMA32 for the next node, 826 * non-reserved memory in ZONE_DMA32 for the next node,
@@ -861,19 +835,15 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
861 * next node, this one must extend to the end of the 835 * next node, this one must extend to the end of the
862 * physical node. 836 * physical node.
863 */ 837 */
864 if (physnodes[i].end - end - 838 if (limit - end -
865 memblock_x86_hole_size(end, physnodes[i].end) < size) 839 memblock_x86_hole_size(end, limit) < size)
866 end = physnodes[i].end; 840 end = limit;
867 841
868 ret = emu_setup_memblk(ei, nid++ % MAX_NUMNODES, i, 842 ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
869 physnodes[i].start, 843 phys_blk,
870 min(end, physnodes[i].end)); 844 min(end, limit) - start);
871 if (ret < 0) 845 if (ret < 0)
872 return ret; 846 return ret;
873
874 physnodes[i].start = min(end, physnodes[i].end);
875 if (physnodes[i].start == physnodes[i].end)
876 node_clear(i, physnode_mask);
877 } 847 }
878 } 848 }
879 return 0; 849 return 0;
@@ -886,10 +856,12 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
886static bool __init numa_emulation(int acpi, int amd) 856static bool __init numa_emulation(int acpi, int amd)
887{ 857{
888 static struct numa_meminfo ei __initdata; 858 static struct numa_meminfo ei __initdata;
859 static struct numa_meminfo pi __initdata;
889 const u64 max_addr = max_pfn << PAGE_SHIFT; 860 const u64 max_addr = max_pfn << PAGE_SHIFT;
890 int i, ret; 861 int i, ret;
891 862
892 memset(&ei, 0, sizeof(ei)); 863 memset(&ei, 0, sizeof(ei));
864 pi = numa_meminfo;
893 865
894 for (i = 0; i < MAX_NUMNODES; i++) 866 for (i = 0; i < MAX_NUMNODES; i++)
895 emu_nid_to_phys[i] = NUMA_NO_NODE; 867 emu_nid_to_phys[i] = NUMA_NO_NODE;
@@ -903,12 +875,12 @@ static bool __init numa_emulation(int acpi, int amd)
903 u64 size; 875 u64 size;
904 876
905 size = memparse(emu_cmdline, &emu_cmdline); 877 size = memparse(emu_cmdline, &emu_cmdline);
906 ret = split_nodes_size_interleave(&ei, 0, max_addr, size); 878 ret = split_nodes_size_interleave(&ei, &pi, 0, max_addr, size);
907 } else { 879 } else {
908 unsigned long n; 880 unsigned long n;
909 881
910 n = simple_strtoul(emu_cmdline, NULL, 0); 882 n = simple_strtoul(emu_cmdline, NULL, 0);
911 ret = split_nodes_interleave(&ei, 0, max_addr, n); 883 ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n);
912 } 884 }
913 885
914 if (ret < 0) 886 if (ret < 0)
@@ -980,7 +952,6 @@ void __init initmem_init(void)
980 if (numa_cleanup_meminfo(&numa_meminfo) < 0) 952 if (numa_cleanup_meminfo(&numa_meminfo) < 0)
981 continue; 953 continue;
982#ifdef CONFIG_NUMA_EMU 954#ifdef CONFIG_NUMA_EMU
983 setup_physnodes(0, max_pfn << PAGE_SHIFT);
984 /* 955 /*
985 * If requested, try emulation. If emulation is not used, 956 * If requested, try emulation. If emulation is not used,
986 * build identity emu_nid_to_phys[] for numa_add_cpu() 957 * build identity emu_nid_to_phys[] for numa_add_cpu()