diff options
Diffstat (limited to 'arch/sparc64/mm/init.c')
| -rw-r--r-- | arch/sparc64/mm/init.c | 989 |
1 files changed, 713 insertions, 276 deletions
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index f37078d96407..177d8aaeec42 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c | |||
| @@ -24,6 +24,8 @@ | |||
| 24 | #include <linux/cache.h> | 24 | #include <linux/cache.h> |
| 25 | #include <linux/sort.h> | 25 | #include <linux/sort.h> |
| 26 | #include <linux/percpu.h> | 26 | #include <linux/percpu.h> |
| 27 | #include <linux/lmb.h> | ||
| 28 | #include <linux/mmzone.h> | ||
| 27 | 29 | ||
| 28 | #include <asm/head.h> | 30 | #include <asm/head.h> |
| 29 | #include <asm/system.h> | 31 | #include <asm/system.h> |
| @@ -72,9 +74,7 @@ extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; | |||
| 72 | #define MAX_BANKS 32 | 74 | #define MAX_BANKS 32 |
| 73 | 75 | ||
| 74 | static struct linux_prom64_registers pavail[MAX_BANKS] __initdata; | 76 | static struct linux_prom64_registers pavail[MAX_BANKS] __initdata; |
| 75 | static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata; | ||
| 76 | static int pavail_ents __initdata; | 77 | static int pavail_ents __initdata; |
| 77 | static int pavail_rescan_ents __initdata; | ||
| 78 | 78 | ||
| 79 | static int cmp_p64(const void *a, const void *b) | 79 | static int cmp_p64(const void *a, const void *b) |
| 80 | { | 80 | { |
| @@ -715,285 +715,684 @@ out: | |||
| 715 | smp_new_mmu_context_version(); | 715 | smp_new_mmu_context_version(); |
| 716 | } | 716 | } |
| 717 | 717 | ||
| 718 | /* Find a free area for the bootmem map, avoiding the kernel image | 718 | static int numa_enabled = 1; |
| 719 | * and the initial ramdisk. | 719 | static int numa_debug; |
| 720 | */ | 720 | |
| 721 | static unsigned long __init choose_bootmap_pfn(unsigned long start_pfn, | 721 | static int __init early_numa(char *p) |
| 722 | unsigned long end_pfn) | ||
| 723 | { | 722 | { |
| 724 | unsigned long avoid_start, avoid_end, bootmap_size; | 723 | if (!p) |
| 725 | int i; | 724 | return 0; |
| 725 | |||
| 726 | if (strstr(p, "off")) | ||
| 727 | numa_enabled = 0; | ||
| 728 | |||
| 729 | if (strstr(p, "debug")) | ||
| 730 | numa_debug = 1; | ||
| 731 | |||
| 732 | return 0; | ||
| 733 | } | ||
| 734 | early_param("numa", early_numa); | ||
| 726 | 735 | ||
| 727 | bootmap_size = bootmem_bootmap_pages(end_pfn - start_pfn); | 736 | #define numadbg(f, a...) \ |
| 728 | bootmap_size <<= PAGE_SHIFT; | 737 | do { if (numa_debug) \ |
| 738 | printk(KERN_INFO f, ## a); \ | ||
| 739 | } while (0) | ||
| 729 | 740 | ||
| 730 | avoid_start = avoid_end = 0; | 741 | static void __init find_ramdisk(unsigned long phys_base) |
| 742 | { | ||
| 731 | #ifdef CONFIG_BLK_DEV_INITRD | 743 | #ifdef CONFIG_BLK_DEV_INITRD |
| 732 | avoid_start = initrd_start; | 744 | if (sparc_ramdisk_image || sparc_ramdisk_image64) { |
| 733 | avoid_end = PAGE_ALIGN(initrd_end); | 745 | unsigned long ramdisk_image; |
| 746 | |||
| 747 | /* Older versions of the bootloader only supported a | ||
| 748 | * 32-bit physical address for the ramdisk image | ||
| 749 | * location, stored at sparc_ramdisk_image. Newer | ||
| 750 | * SILO versions set sparc_ramdisk_image to zero and | ||
| 751 | * provide a full 64-bit physical address at | ||
| 752 | * sparc_ramdisk_image64. | ||
| 753 | */ | ||
| 754 | ramdisk_image = sparc_ramdisk_image; | ||
| 755 | if (!ramdisk_image) | ||
| 756 | ramdisk_image = sparc_ramdisk_image64; | ||
| 757 | |||
| 758 | /* Another bootloader quirk. The bootloader normalizes | ||
| 759 | * the physical address to KERNBASE, so we have to | ||
| 760 | * factor that back out and add in the lowest valid | ||
| 761 | * physical page address to get the true physical address. | ||
| 762 | */ | ||
| 763 | ramdisk_image -= KERNBASE; | ||
| 764 | ramdisk_image += phys_base; | ||
| 765 | |||
| 766 | numadbg("Found ramdisk at physical address 0x%lx, size %u\n", | ||
| 767 | ramdisk_image, sparc_ramdisk_size); | ||
| 768 | |||
| 769 | initrd_start = ramdisk_image; | ||
| 770 | initrd_end = ramdisk_image + sparc_ramdisk_size; | ||
| 771 | |||
| 772 | lmb_reserve(initrd_start, initrd_end); | ||
| 773 | } | ||
| 734 | #endif | 774 | #endif |
| 775 | } | ||
| 735 | 776 | ||
| 736 | for (i = 0; i < pavail_ents; i++) { | 777 | struct node_mem_mask { |
| 737 | unsigned long start, end; | 778 | unsigned long mask; |
| 779 | unsigned long val; | ||
| 780 | unsigned long bootmem_paddr; | ||
| 781 | }; | ||
| 782 | static struct node_mem_mask node_masks[MAX_NUMNODES]; | ||
| 783 | static int num_node_masks; | ||
| 738 | 784 | ||
| 739 | start = pavail[i].phys_addr; | 785 | int numa_cpu_lookup_table[NR_CPUS]; |
| 740 | end = start + pavail[i].reg_size; | 786 | cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; |
| 741 | 787 | ||
| 742 | while (start < end) { | 788 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
| 743 | if (start >= kern_base && | 789 | static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; |
| 744 | start < PAGE_ALIGN(kern_base + kern_size)) { | ||
| 745 | start = PAGE_ALIGN(kern_base + kern_size); | ||
| 746 | continue; | ||
| 747 | } | ||
| 748 | if (start >= avoid_start && start < avoid_end) { | ||
| 749 | start = avoid_end; | ||
| 750 | continue; | ||
| 751 | } | ||
| 752 | 790 | ||
| 753 | if ((end - start) < bootmap_size) | 791 | struct mdesc_mblock { |
| 754 | break; | 792 | u64 base; |
| 793 | u64 size; | ||
| 794 | u64 offset; /* RA-to-PA */ | ||
| 795 | }; | ||
| 796 | static struct mdesc_mblock *mblocks; | ||
| 797 | static int num_mblocks; | ||
| 755 | 798 | ||
| 756 | if (start < kern_base && | 799 | static unsigned long ra_to_pa(unsigned long addr) |
| 757 | (start + bootmap_size) > kern_base) { | 800 | { |
| 758 | start = PAGE_ALIGN(kern_base + kern_size); | 801 | int i; |
| 759 | continue; | ||
| 760 | } | ||
| 761 | 802 | ||
| 762 | if (start < avoid_start && | 803 | for (i = 0; i < num_mblocks; i++) { |
| 763 | (start + bootmap_size) > avoid_start) { | 804 | struct mdesc_mblock *m = &mblocks[i]; |
| 764 | start = avoid_end; | ||
| 765 | continue; | ||
| 766 | } | ||
| 767 | 805 | ||
| 768 | /* OK, it doesn't overlap anything, use it. */ | 806 | if (addr >= m->base && |
| 769 | return start >> PAGE_SHIFT; | 807 | addr < (m->base + m->size)) { |
| 808 | addr += m->offset; | ||
| 809 | break; | ||
| 770 | } | 810 | } |
| 771 | } | 811 | } |
| 772 | 812 | return addr; | |
| 773 | prom_printf("Cannot find free area for bootmap, aborting.\n"); | ||
| 774 | prom_halt(); | ||
| 775 | } | 813 | } |
| 776 | 814 | ||
| 777 | static void __init trim_pavail(unsigned long *cur_size_p, | 815 | static int find_node(unsigned long addr) |
| 778 | unsigned long *end_of_phys_p) | ||
| 779 | { | 816 | { |
| 780 | unsigned long to_trim = *cur_size_p - cmdline_memory_size; | ||
| 781 | unsigned long avoid_start, avoid_end; | ||
| 782 | int i; | 817 | int i; |
| 783 | 818 | ||
| 784 | to_trim = PAGE_ALIGN(to_trim); | 819 | addr = ra_to_pa(addr); |
| 820 | for (i = 0; i < num_node_masks; i++) { | ||
| 821 | struct node_mem_mask *p = &node_masks[i]; | ||
| 785 | 822 | ||
| 786 | avoid_start = avoid_end = 0; | 823 | if ((addr & p->mask) == p->val) |
| 787 | #ifdef CONFIG_BLK_DEV_INITRD | 824 | return i; |
| 788 | avoid_start = initrd_start; | 825 | } |
| 789 | avoid_end = PAGE_ALIGN(initrd_end); | 826 | return -1; |
| 827 | } | ||
| 828 | |||
| 829 | static unsigned long nid_range(unsigned long start, unsigned long end, | ||
| 830 | int *nid) | ||
| 831 | { | ||
| 832 | *nid = find_node(start); | ||
| 833 | start += PAGE_SIZE; | ||
| 834 | while (start < end) { | ||
| 835 | int n = find_node(start); | ||
| 836 | |||
| 837 | if (n != *nid) | ||
| 838 | break; | ||
| 839 | start += PAGE_SIZE; | ||
| 840 | } | ||
| 841 | |||
| 842 | return start; | ||
| 843 | } | ||
| 844 | #else | ||
| 845 | static unsigned long nid_range(unsigned long start, unsigned long end, | ||
| 846 | int *nid) | ||
| 847 | { | ||
| 848 | *nid = 0; | ||
| 849 | return end; | ||
| 850 | } | ||
| 790 | #endif | 851 | #endif |
| 791 | 852 | ||
| 792 | /* Trim some pavail[] entries in order to satisfy the | 853 | /* This must be invoked after performing all of the necessary |
| 793 | * requested "mem=xxx" kernel command line specification. | 854 | * add_active_range() calls for 'nid'. We need to be able to get |
| 794 | * | 855 | * correct data from get_pfn_range_for_nid(). |
| 795 | * We must not trim off the kernel image area nor the | 856 | */ |
| 796 | * initial ramdisk range (if any). Also, we must not trim | 857 | static void __init allocate_node_data(int nid) |
| 797 | * any pavail[] entry down to zero in order to preserve | 858 | { |
| 798 | * the invariant that all pavail[] entries have a non-zero | 859 | unsigned long paddr, num_pages, start_pfn, end_pfn; |
| 799 | * size which is assumed by all of the code in here. | 860 | struct pglist_data *p; |
| 800 | */ | 861 | |
| 801 | for (i = 0; i < pavail_ents; i++) { | 862 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
| 802 | unsigned long start, end, kern_end; | 863 | paddr = lmb_alloc_nid(sizeof(struct pglist_data), |
| 803 | unsigned long trim_low, trim_high, n; | 864 | SMP_CACHE_BYTES, nid, nid_range); |
| 865 | if (!paddr) { | ||
| 866 | prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid); | ||
| 867 | prom_halt(); | ||
| 868 | } | ||
| 869 | NODE_DATA(nid) = __va(paddr); | ||
| 870 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | ||
| 804 | 871 | ||
| 805 | kern_end = PAGE_ALIGN(kern_base + kern_size); | 872 | NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; |
| 873 | #endif | ||
| 806 | 874 | ||
| 807 | trim_low = start = pavail[i].phys_addr; | 875 | p = NODE_DATA(nid); |
| 808 | trim_high = end = start + pavail[i].reg_size; | ||
| 809 | 876 | ||
| 810 | if (kern_base >= start && | 877 | get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); |
| 811 | kern_base < end) { | 878 | p->node_start_pfn = start_pfn; |
| 812 | trim_low = kern_base; | 879 | p->node_spanned_pages = end_pfn - start_pfn; |
| 813 | if (kern_end >= end) | 880 | |
| 814 | continue; | 881 | if (p->node_spanned_pages) { |
| 815 | } | 882 | num_pages = bootmem_bootmap_pages(p->node_spanned_pages); |
| 816 | if (kern_end >= start && | 883 | |
| 817 | kern_end < end) { | 884 | paddr = lmb_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid, |
| 818 | trim_high = kern_end; | 885 | nid_range); |
| 819 | } | 886 | if (!paddr) { |
| 820 | if (avoid_start && | 887 | prom_printf("Cannot allocate bootmap for nid[%d]\n", |
| 821 | avoid_start >= start && | 888 | nid); |
| 822 | avoid_start < end) { | 889 | prom_halt(); |
| 823 | if (trim_low > avoid_start) | ||
| 824 | trim_low = avoid_start; | ||
| 825 | if (avoid_end >= end) | ||
| 826 | continue; | ||
| 827 | } | ||
| 828 | if (avoid_end && | ||
| 829 | avoid_end >= start && | ||
| 830 | avoid_end < end) { | ||
| 831 | if (trim_high < avoid_end) | ||
| 832 | trim_high = avoid_end; | ||
| 833 | } | 890 | } |
| 891 | node_masks[nid].bootmem_paddr = paddr; | ||
| 892 | } | ||
| 893 | } | ||
| 894 | |||
| 895 | static void init_node_masks_nonnuma(void) | ||
| 896 | { | ||
| 897 | int i; | ||
| 898 | |||
| 899 | numadbg("Initializing tables for non-numa.\n"); | ||
| 900 | |||
| 901 | node_masks[0].mask = node_masks[0].val = 0; | ||
| 902 | num_node_masks = 1; | ||
| 903 | |||
| 904 | for (i = 0; i < NR_CPUS; i++) | ||
| 905 | numa_cpu_lookup_table[i] = 0; | ||
| 906 | |||
| 907 | numa_cpumask_lookup_table[0] = CPU_MASK_ALL; | ||
| 908 | } | ||
| 909 | |||
| 910 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
| 911 | struct pglist_data *node_data[MAX_NUMNODES]; | ||
| 912 | |||
| 913 | EXPORT_SYMBOL(numa_cpu_lookup_table); | ||
| 914 | EXPORT_SYMBOL(numa_cpumask_lookup_table); | ||
| 915 | EXPORT_SYMBOL(node_data); | ||
| 916 | |||
| 917 | struct mdesc_mlgroup { | ||
| 918 | u64 node; | ||
| 919 | u64 latency; | ||
| 920 | u64 match; | ||
| 921 | u64 mask; | ||
| 922 | }; | ||
| 923 | static struct mdesc_mlgroup *mlgroups; | ||
| 924 | static int num_mlgroups; | ||
| 925 | |||
| 926 | static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio, | ||
| 927 | u32 cfg_handle) | ||
| 928 | { | ||
| 929 | u64 arc; | ||
| 834 | 930 | ||
| 835 | if (trim_high <= trim_low) | 931 | mdesc_for_each_arc(arc, md, pio, MDESC_ARC_TYPE_FWD) { |
| 932 | u64 target = mdesc_arc_target(md, arc); | ||
| 933 | const u64 *val; | ||
| 934 | |||
| 935 | val = mdesc_get_property(md, target, | ||
| 936 | "cfg-handle", NULL); | ||
| 937 | if (val && *val == cfg_handle) | ||
| 938 | return 0; | ||
| 939 | } | ||
| 940 | return -ENODEV; | ||
| 941 | } | ||
| 942 | |||
| 943 | static int scan_arcs_for_cfg_handle(struct mdesc_handle *md, u64 grp, | ||
| 944 | u32 cfg_handle) | ||
| 945 | { | ||
| 946 | u64 arc, candidate, best_latency = ~(u64)0; | ||
| 947 | |||
| 948 | candidate = MDESC_NODE_NULL; | ||
| 949 | mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) { | ||
| 950 | u64 target = mdesc_arc_target(md, arc); | ||
| 951 | const char *name = mdesc_node_name(md, target); | ||
| 952 | const u64 *val; | ||
| 953 | |||
| 954 | if (strcmp(name, "pio-latency-group")) | ||
| 836 | continue; | 955 | continue; |
| 837 | 956 | ||
| 838 | if (trim_low == start && trim_high == end) { | 957 | val = mdesc_get_property(md, target, "latency", NULL); |
| 839 | /* Whole chunk is available for trimming. | 958 | if (!val) |
| 840 | * Trim all except one page, in order to keep | 959 | continue; |
| 841 | * entry non-empty. | 960 | |
| 842 | */ | 961 | if (*val < best_latency) { |
| 843 | n = (end - start) - PAGE_SIZE; | 962 | candidate = target; |
| 844 | if (n > to_trim) | 963 | best_latency = *val; |
| 845 | n = to_trim; | ||
| 846 | |||
| 847 | if (n) { | ||
| 848 | pavail[i].phys_addr += n; | ||
| 849 | pavail[i].reg_size -= n; | ||
| 850 | to_trim -= n; | ||
| 851 | } | ||
| 852 | } else { | ||
| 853 | n = (trim_low - start); | ||
| 854 | if (n > to_trim) | ||
| 855 | n = to_trim; | ||
| 856 | |||
| 857 | if (n) { | ||
| 858 | pavail[i].phys_addr += n; | ||
| 859 | pavail[i].reg_size -= n; | ||
| 860 | to_trim -= n; | ||
| 861 | } | ||
| 862 | if (to_trim) { | ||
| 863 | n = end - trim_high; | ||
| 864 | if (n > to_trim) | ||
| 865 | n = to_trim; | ||
| 866 | if (n) { | ||
| 867 | pavail[i].reg_size -= n; | ||
| 868 | to_trim -= n; | ||
| 869 | } | ||
| 870 | } | ||
| 871 | } | 964 | } |
| 965 | } | ||
| 966 | |||
| 967 | if (candidate == MDESC_NODE_NULL) | ||
| 968 | return -ENODEV; | ||
| 969 | |||
| 970 | return scan_pio_for_cfg_handle(md, candidate, cfg_handle); | ||
| 971 | } | ||
| 972 | |||
| 973 | int of_node_to_nid(struct device_node *dp) | ||
| 974 | { | ||
| 975 | const struct linux_prom64_registers *regs; | ||
| 976 | struct mdesc_handle *md; | ||
| 977 | u32 cfg_handle; | ||
| 978 | int count, nid; | ||
| 979 | u64 grp; | ||
| 872 | 980 | ||
| 873 | if (!to_trim) | 981 | if (!mlgroups) |
| 982 | return -1; | ||
| 983 | |||
| 984 | regs = of_get_property(dp, "reg", NULL); | ||
| 985 | if (!regs) | ||
| 986 | return -1; | ||
| 987 | |||
| 988 | cfg_handle = (regs->phys_addr >> 32UL) & 0x0fffffff; | ||
| 989 | |||
| 990 | md = mdesc_grab(); | ||
| 991 | |||
| 992 | count = 0; | ||
| 993 | nid = -1; | ||
| 994 | mdesc_for_each_node_by_name(md, grp, "group") { | ||
| 995 | if (!scan_arcs_for_cfg_handle(md, grp, cfg_handle)) { | ||
| 996 | nid = count; | ||
| 874 | break; | 997 | break; |
| 998 | } | ||
| 999 | count++; | ||
| 875 | } | 1000 | } |
| 876 | 1001 | ||
| 877 | /* Recalculate. */ | 1002 | mdesc_release(md); |
| 878 | *cur_size_p = 0UL; | 1003 | |
| 879 | for (i = 0; i < pavail_ents; i++) { | 1004 | return nid; |
| 880 | *end_of_phys_p = pavail[i].phys_addr + | ||
| 881 | pavail[i].reg_size; | ||
| 882 | *cur_size_p += pavail[i].reg_size; | ||
| 883 | } | ||
| 884 | } | 1005 | } |
| 885 | 1006 | ||
| 886 | /* About pages_avail, this is the value we will use to calculate | 1007 | static void add_node_ranges(void) |
| 887 | * the zholes_size[] argument given to free_area_init_node(). The | ||
| 888 | * page allocator uses this to calculate nr_kernel_pages, | ||
| 889 | * nr_all_pages and zone->present_pages. On NUMA it is used | ||
| 890 | * to calculate zone->min_unmapped_pages and zone->min_slab_pages. | ||
| 891 | * | ||
| 892 | * So this number should really be set to what the page allocator | ||
| 893 | * actually ends up with. This means: | ||
| 894 | * 1) It should include bootmem map pages, we'll release those. | ||
| 895 | * 2) It should not include the kernel image, except for the | ||
| 896 | * __init sections which we will also release. | ||
| 897 | * 3) It should include the initrd image, since we'll release | ||
| 898 | * that too. | ||
| 899 | */ | ||
| 900 | static unsigned long __init bootmem_init(unsigned long *pages_avail, | ||
| 901 | unsigned long phys_base) | ||
| 902 | { | 1008 | { |
| 903 | unsigned long bootmap_size, end_pfn; | ||
| 904 | unsigned long end_of_phys_memory = 0UL; | ||
| 905 | unsigned long bootmap_pfn, bytes_avail, size; | ||
| 906 | int i; | 1009 | int i; |
| 907 | 1010 | ||
| 908 | bytes_avail = 0UL; | 1011 | for (i = 0; i < lmb.memory.cnt; i++) { |
| 909 | for (i = 0; i < pavail_ents; i++) { | 1012 | unsigned long size = lmb_size_bytes(&lmb.memory, i); |
| 910 | end_of_phys_memory = pavail[i].phys_addr + | 1013 | unsigned long start, end; |
| 911 | pavail[i].reg_size; | 1014 | |
| 912 | bytes_avail += pavail[i].reg_size; | 1015 | start = lmb.memory.region[i].base; |
| 1016 | end = start + size; | ||
| 1017 | while (start < end) { | ||
| 1018 | unsigned long this_end; | ||
| 1019 | int nid; | ||
| 1020 | |||
| 1021 | this_end = nid_range(start, end, &nid); | ||
| 1022 | |||
| 1023 | numadbg("Adding active range nid[%d] " | ||
| 1024 | "start[%lx] end[%lx]\n", | ||
| 1025 | nid, start, this_end); | ||
| 1026 | |||
| 1027 | add_active_range(nid, | ||
| 1028 | start >> PAGE_SHIFT, | ||
| 1029 | this_end >> PAGE_SHIFT); | ||
| 1030 | |||
| 1031 | start = this_end; | ||
| 1032 | } | ||
| 913 | } | 1033 | } |
| 1034 | } | ||
| 914 | 1035 | ||
| 915 | /* Determine the location of the initial ramdisk before trying | 1036 | static int __init grab_mlgroups(struct mdesc_handle *md) |
| 916 | * to honor the "mem=xxx" command line argument. We must know | 1037 | { |
| 917 | * where the kernel image and the ramdisk image are so that we | 1038 | unsigned long paddr; |
| 918 | * do not trim those two areas from the physical memory map. | 1039 | int count = 0; |
| 919 | */ | 1040 | u64 node; |
| 1041 | |||
| 1042 | mdesc_for_each_node_by_name(md, node, "memory-latency-group") | ||
| 1043 | count++; | ||
| 1044 | if (!count) | ||
| 1045 | return -ENOENT; | ||
| 1046 | |||
| 1047 | paddr = lmb_alloc(count * sizeof(struct mdesc_mlgroup), | ||
| 1048 | SMP_CACHE_BYTES); | ||
| 1049 | if (!paddr) | ||
| 1050 | return -ENOMEM; | ||
| 1051 | |||
| 1052 | mlgroups = __va(paddr); | ||
| 1053 | num_mlgroups = count; | ||
| 1054 | |||
| 1055 | count = 0; | ||
| 1056 | mdesc_for_each_node_by_name(md, node, "memory-latency-group") { | ||
| 1057 | struct mdesc_mlgroup *m = &mlgroups[count++]; | ||
| 1058 | const u64 *val; | ||
| 1059 | |||
| 1060 | m->node = node; | ||
| 1061 | |||
| 1062 | val = mdesc_get_property(md, node, "latency", NULL); | ||
| 1063 | m->latency = *val; | ||
| 1064 | val = mdesc_get_property(md, node, "address-match", NULL); | ||
| 1065 | m->match = *val; | ||
| 1066 | val = mdesc_get_property(md, node, "address-mask", NULL); | ||
| 1067 | m->mask = *val; | ||
| 1068 | |||
| 1069 | numadbg("MLGROUP[%d]: node[%lx] latency[%lx] " | ||
| 1070 | "match[%lx] mask[%lx]\n", | ||
| 1071 | count - 1, m->node, m->latency, m->match, m->mask); | ||
| 1072 | } | ||
| 920 | 1073 | ||
| 921 | #ifdef CONFIG_BLK_DEV_INITRD | 1074 | return 0; |
| 922 | /* Now have to check initial ramdisk, so that bootmap does not overwrite it */ | 1075 | } |
| 923 | if (sparc_ramdisk_image || sparc_ramdisk_image64) { | 1076 | |
| 924 | unsigned long ramdisk_image = sparc_ramdisk_image ? | 1077 | static int __init grab_mblocks(struct mdesc_handle *md) |
| 925 | sparc_ramdisk_image : sparc_ramdisk_image64; | 1078 | { |
| 926 | ramdisk_image -= KERNBASE; | 1079 | unsigned long paddr; |
| 927 | initrd_start = ramdisk_image + phys_base; | 1080 | int count = 0; |
| 928 | initrd_end = initrd_start + sparc_ramdisk_size; | 1081 | u64 node; |
| 929 | if (initrd_end > end_of_phys_memory) { | 1082 | |
| 930 | printk(KERN_CRIT "initrd extends beyond end of memory " | 1083 | mdesc_for_each_node_by_name(md, node, "mblock") |
| 931 | "(0x%016lx > 0x%016lx)\ndisabling initrd\n", | 1084 | count++; |
| 932 | initrd_end, end_of_phys_memory); | 1085 | if (!count) |
| 933 | initrd_start = 0; | 1086 | return -ENOENT; |
| 934 | initrd_end = 0; | 1087 | |
| 1088 | paddr = lmb_alloc(count * sizeof(struct mdesc_mblock), | ||
| 1089 | SMP_CACHE_BYTES); | ||
| 1090 | if (!paddr) | ||
| 1091 | return -ENOMEM; | ||
| 1092 | |||
| 1093 | mblocks = __va(paddr); | ||
| 1094 | num_mblocks = count; | ||
| 1095 | |||
| 1096 | count = 0; | ||
| 1097 | mdesc_for_each_node_by_name(md, node, "mblock") { | ||
| 1098 | struct mdesc_mblock *m = &mblocks[count++]; | ||
| 1099 | const u64 *val; | ||
| 1100 | |||
| 1101 | val = mdesc_get_property(md, node, "base", NULL); | ||
| 1102 | m->base = *val; | ||
| 1103 | val = mdesc_get_property(md, node, "size", NULL); | ||
| 1104 | m->size = *val; | ||
| 1105 | val = mdesc_get_property(md, node, | ||
| 1106 | "address-congruence-offset", NULL); | ||
| 1107 | m->offset = *val; | ||
| 1108 | |||
| 1109 | numadbg("MBLOCK[%d]: base[%lx] size[%lx] offset[%lx]\n", | ||
| 1110 | count - 1, m->base, m->size, m->offset); | ||
| 1111 | } | ||
| 1112 | |||
| 1113 | return 0; | ||
| 1114 | } | ||
| 1115 | |||
| 1116 | static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md, | ||
| 1117 | u64 grp, cpumask_t *mask) | ||
| 1118 | { | ||
| 1119 | u64 arc; | ||
| 1120 | |||
| 1121 | cpus_clear(*mask); | ||
| 1122 | |||
| 1123 | mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) { | ||
| 1124 | u64 target = mdesc_arc_target(md, arc); | ||
| 1125 | const char *name = mdesc_node_name(md, target); | ||
| 1126 | const u64 *id; | ||
| 1127 | |||
| 1128 | if (strcmp(name, "cpu")) | ||
| 1129 | continue; | ||
| 1130 | id = mdesc_get_property(md, target, "id", NULL); | ||
| 1131 | if (*id < NR_CPUS) | ||
| 1132 | cpu_set(*id, *mask); | ||
| 1133 | } | ||
| 1134 | } | ||
| 1135 | |||
| 1136 | static struct mdesc_mlgroup * __init find_mlgroup(u64 node) | ||
| 1137 | { | ||
| 1138 | int i; | ||
| 1139 | |||
| 1140 | for (i = 0; i < num_mlgroups; i++) { | ||
| 1141 | struct mdesc_mlgroup *m = &mlgroups[i]; | ||
| 1142 | if (m->node == node) | ||
| 1143 | return m; | ||
| 1144 | } | ||
| 1145 | return NULL; | ||
| 1146 | } | ||
| 1147 | |||
| 1148 | static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp, | ||
| 1149 | int index) | ||
| 1150 | { | ||
| 1151 | struct mdesc_mlgroup *candidate = NULL; | ||
| 1152 | u64 arc, best_latency = ~(u64)0; | ||
| 1153 | struct node_mem_mask *n; | ||
| 1154 | |||
| 1155 | mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) { | ||
| 1156 | u64 target = mdesc_arc_target(md, arc); | ||
| 1157 | struct mdesc_mlgroup *m = find_mlgroup(target); | ||
| 1158 | if (!m) | ||
| 1159 | continue; | ||
| 1160 | if (m->latency < best_latency) { | ||
| 1161 | candidate = m; | ||
| 1162 | best_latency = m->latency; | ||
| 935 | } | 1163 | } |
| 936 | } | 1164 | } |
| 937 | #endif | 1165 | if (!candidate) |
| 1166 | return -ENOENT; | ||
| 1167 | |||
| 1168 | if (num_node_masks != index) { | ||
| 1169 | printk(KERN_ERR "Inconsistent NUMA state, " | ||
| 1170 | "index[%d] != num_node_masks[%d]\n", | ||
| 1171 | index, num_node_masks); | ||
| 1172 | return -EINVAL; | ||
| 1173 | } | ||
| 938 | 1174 | ||
| 939 | if (cmdline_memory_size && | 1175 | n = &node_masks[num_node_masks++]; |
| 940 | bytes_avail > cmdline_memory_size) | ||
| 941 | trim_pavail(&bytes_avail, | ||
| 942 | &end_of_phys_memory); | ||
| 943 | 1176 | ||
| 944 | *pages_avail = bytes_avail >> PAGE_SHIFT; | 1177 | n->mask = candidate->mask; |
| 1178 | n->val = candidate->match; | ||
| 945 | 1179 | ||
| 946 | end_pfn = end_of_phys_memory >> PAGE_SHIFT; | 1180 | numadbg("NUMA NODE[%d]: mask[%lx] val[%lx] (latency[%lx])\n", |
| 1181 | index, n->mask, n->val, candidate->latency); | ||
| 947 | 1182 | ||
| 948 | /* Initialize the boot-time allocator. */ | 1183 | return 0; |
| 949 | max_pfn = max_low_pfn = end_pfn; | 1184 | } |
| 950 | min_low_pfn = (phys_base >> PAGE_SHIFT); | 1185 | |
| 1186 | static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp, | ||
| 1187 | int index) | ||
| 1188 | { | ||
| 1189 | cpumask_t mask; | ||
| 1190 | int cpu; | ||
| 951 | 1191 | ||
| 952 | bootmap_pfn = choose_bootmap_pfn(min_low_pfn, end_pfn); | 1192 | numa_parse_mdesc_group_cpus(md, grp, &mask); |
| 953 | 1193 | ||
| 954 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, | 1194 | for_each_cpu_mask(cpu, mask) |
| 955 | min_low_pfn, end_pfn); | 1195 | numa_cpu_lookup_table[cpu] = index; |
| 1196 | numa_cpumask_lookup_table[index] = mask; | ||
| 956 | 1197 | ||
| 957 | /* Now register the available physical memory with the | 1198 | if (numa_debug) { |
| 958 | * allocator. | 1199 | printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index); |
| 959 | */ | 1200 | for_each_cpu_mask(cpu, mask) |
| 960 | for (i = 0; i < pavail_ents; i++) | 1201 | printk("%d ", cpu); |
| 961 | free_bootmem(pavail[i].phys_addr, pavail[i].reg_size); | 1202 | printk("]\n"); |
| 1203 | } | ||
| 962 | 1204 | ||
| 963 | #ifdef CONFIG_BLK_DEV_INITRD | 1205 | return numa_attach_mlgroup(md, grp, index); |
| 964 | if (initrd_start) { | 1206 | } |
| 965 | size = initrd_end - initrd_start; | 1207 | |
| 1208 | static int __init numa_parse_mdesc(void) | ||
| 1209 | { | ||
| 1210 | struct mdesc_handle *md = mdesc_grab(); | ||
| 1211 | int i, err, count; | ||
| 1212 | u64 node; | ||
| 1213 | |||
| 1214 | node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); | ||
| 1215 | if (node == MDESC_NODE_NULL) { | ||
| 1216 | mdesc_release(md); | ||
| 1217 | return -ENOENT; | ||
| 1218 | } | ||
| 1219 | |||
| 1220 | err = grab_mblocks(md); | ||
| 1221 | if (err < 0) | ||
| 1222 | goto out; | ||
| 1223 | |||
| 1224 | err = grab_mlgroups(md); | ||
| 1225 | if (err < 0) | ||
| 1226 | goto out; | ||
| 1227 | |||
| 1228 | count = 0; | ||
| 1229 | mdesc_for_each_node_by_name(md, node, "group") { | ||
| 1230 | err = numa_parse_mdesc_group(md, node, count); | ||
| 1231 | if (err < 0) | ||
| 1232 | break; | ||
| 1233 | count++; | ||
| 1234 | } | ||
| 1235 | |||
| 1236 | add_node_ranges(); | ||
| 1237 | |||
| 1238 | for (i = 0; i < num_node_masks; i++) { | ||
| 1239 | allocate_node_data(i); | ||
| 1240 | node_set_online(i); | ||
| 1241 | } | ||
| 1242 | |||
| 1243 | err = 0; | ||
| 1244 | out: | ||
| 1245 | mdesc_release(md); | ||
| 1246 | return err; | ||
| 1247 | } | ||
| 1248 | |||
| 1249 | static int __init numa_parse_sun4u(void) | ||
| 1250 | { | ||
| 1251 | return -1; | ||
| 1252 | } | ||
| 966 | 1253 | ||
| 967 | /* Reserve the initrd image area. */ | 1254 | static int __init bootmem_init_numa(void) |
| 968 | reserve_bootmem(initrd_start, size, BOOTMEM_DEFAULT); | 1255 | { |
| 1256 | int err = -1; | ||
| 969 | 1257 | ||
| 970 | initrd_start += PAGE_OFFSET; | 1258 | numadbg("bootmem_init_numa()\n"); |
| 971 | initrd_end += PAGE_OFFSET; | 1259 | |
| 1260 | if (numa_enabled) { | ||
| 1261 | if (tlb_type == hypervisor) | ||
| 1262 | err = numa_parse_mdesc(); | ||
| 1263 | else | ||
| 1264 | err = numa_parse_sun4u(); | ||
| 972 | } | 1265 | } |
| 1266 | return err; | ||
| 1267 | } | ||
| 1268 | |||
| 1269 | #else | ||
| 1270 | |||
| 1271 | static int bootmem_init_numa(void) | ||
| 1272 | { | ||
| 1273 | return -1; | ||
| 1274 | } | ||
| 1275 | |||
| 973 | #endif | 1276 | #endif |
| 974 | /* Reserve the kernel text/data/bss. */ | ||
| 975 | reserve_bootmem(kern_base, kern_size, BOOTMEM_DEFAULT); | ||
| 976 | *pages_avail -= PAGE_ALIGN(kern_size) >> PAGE_SHIFT; | ||
| 977 | |||
| 978 | /* Add back in the initmem pages. */ | ||
| 979 | size = ((unsigned long)(__init_end) & PAGE_MASK) - | ||
| 980 | PAGE_ALIGN((unsigned long)__init_begin); | ||
| 981 | *pages_avail += size >> PAGE_SHIFT; | ||
| 982 | |||
| 983 | /* Reserve the bootmem map. We do not account for it | ||
| 984 | * in pages_avail because we will release that memory | ||
| 985 | * in free_all_bootmem. | ||
| 986 | */ | ||
| 987 | size = bootmap_size; | ||
| 988 | reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size, BOOTMEM_DEFAULT); | ||
| 989 | 1277 | ||
| 990 | for (i = 0; i < pavail_ents; i++) { | 1278 | static void __init bootmem_init_nonnuma(void) |
| 1279 | { | ||
| 1280 | unsigned long top_of_ram = lmb_end_of_DRAM(); | ||
| 1281 | unsigned long total_ram = lmb_phys_mem_size(); | ||
| 1282 | unsigned int i; | ||
| 1283 | |||
| 1284 | numadbg("bootmem_init_nonnuma()\n"); | ||
| 1285 | |||
| 1286 | printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", | ||
| 1287 | top_of_ram, total_ram); | ||
| 1288 | printk(KERN_INFO "Memory hole size: %ldMB\n", | ||
| 1289 | (top_of_ram - total_ram) >> 20); | ||
| 1290 | |||
| 1291 | init_node_masks_nonnuma(); | ||
| 1292 | |||
| 1293 | for (i = 0; i < lmb.memory.cnt; i++) { | ||
| 1294 | unsigned long size = lmb_size_bytes(&lmb.memory, i); | ||
| 991 | unsigned long start_pfn, end_pfn; | 1295 | unsigned long start_pfn, end_pfn; |
| 992 | 1296 | ||
| 993 | start_pfn = pavail[i].phys_addr >> PAGE_SHIFT; | 1297 | if (!size) |
| 994 | end_pfn = (start_pfn + (pavail[i].reg_size >> PAGE_SHIFT)); | 1298 | continue; |
| 995 | memory_present(0, start_pfn, end_pfn); | 1299 | |
| 1300 | start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; | ||
| 1301 | end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); | ||
| 1302 | add_active_range(0, start_pfn, end_pfn); | ||
| 1303 | } | ||
| 1304 | |||
| 1305 | allocate_node_data(0); | ||
| 1306 | |||
| 1307 | node_set_online(0); | ||
| 1308 | } | ||
| 1309 | |||
| 1310 | static void __init reserve_range_in_node(int nid, unsigned long start, | ||
| 1311 | unsigned long end) | ||
| 1312 | { | ||
| 1313 | numadbg(" reserve_range_in_node(nid[%d],start[%lx],end[%lx]\n", | ||
| 1314 | nid, start, end); | ||
| 1315 | while (start < end) { | ||
| 1316 | unsigned long this_end; | ||
| 1317 | int n; | ||
| 1318 | |||
| 1319 | this_end = nid_range(start, end, &n); | ||
| 1320 | if (n == nid) { | ||
| 1321 | numadbg(" MATCH reserving range [%lx:%lx]\n", | ||
| 1322 | start, this_end); | ||
| 1323 | reserve_bootmem_node(NODE_DATA(nid), start, | ||
| 1324 | (this_end - start), BOOTMEM_DEFAULT); | ||
| 1325 | } else | ||
| 1326 | numadbg(" NO MATCH, advancing start to %lx\n", | ||
| 1327 | this_end); | ||
| 1328 | |||
| 1329 | start = this_end; | ||
| 1330 | } | ||
| 1331 | } | ||
| 1332 | |||
| 1333 | static void __init trim_reserved_in_node(int nid) | ||
| 1334 | { | ||
| 1335 | int i; | ||
| 1336 | |||
| 1337 | numadbg(" trim_reserved_in_node(%d)\n", nid); | ||
| 1338 | |||
| 1339 | for (i = 0; i < lmb.reserved.cnt; i++) { | ||
| 1340 | unsigned long start = lmb.reserved.region[i].base; | ||
| 1341 | unsigned long size = lmb_size_bytes(&lmb.reserved, i); | ||
| 1342 | unsigned long end = start + size; | ||
| 1343 | |||
| 1344 | reserve_range_in_node(nid, start, end); | ||
| 1345 | } | ||
| 1346 | } | ||
| 1347 | |||
| 1348 | static void __init bootmem_init_one_node(int nid) | ||
| 1349 | { | ||
| 1350 | struct pglist_data *p; | ||
| 1351 | |||
| 1352 | numadbg("bootmem_init_one_node(%d)\n", nid); | ||
| 1353 | |||
| 1354 | p = NODE_DATA(nid); | ||
| 1355 | |||
| 1356 | if (p->node_spanned_pages) { | ||
| 1357 | unsigned long paddr = node_masks[nid].bootmem_paddr; | ||
| 1358 | unsigned long end_pfn; | ||
| 1359 | |||
| 1360 | end_pfn = p->node_start_pfn + p->node_spanned_pages; | ||
| 1361 | |||
| 1362 | numadbg(" init_bootmem_node(%d, %lx, %lx, %lx)\n", | ||
| 1363 | nid, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn); | ||
| 1364 | |||
| 1365 | init_bootmem_node(p, paddr >> PAGE_SHIFT, | ||
| 1366 | p->node_start_pfn, end_pfn); | ||
| 1367 | |||
| 1368 | numadbg(" free_bootmem_with_active_regions(%d, %lx)\n", | ||
| 1369 | nid, end_pfn); | ||
| 1370 | free_bootmem_with_active_regions(nid, end_pfn); | ||
| 1371 | |||
| 1372 | trim_reserved_in_node(nid); | ||
| 1373 | |||
| 1374 | numadbg(" sparse_memory_present_with_active_regions(%d)\n", | ||
| 1375 | nid); | ||
| 1376 | sparse_memory_present_with_active_regions(nid); | ||
| 996 | } | 1377 | } |
| 1378 | } | ||
| 1379 | |||
| 1380 | static unsigned long __init bootmem_init(unsigned long phys_base) | ||
| 1381 | { | ||
| 1382 | unsigned long end_pfn; | ||
| 1383 | int nid; | ||
| 1384 | |||
| 1385 | end_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; | ||
| 1386 | max_pfn = max_low_pfn = end_pfn; | ||
| 1387 | min_low_pfn = (phys_base >> PAGE_SHIFT); | ||
| 1388 | |||
| 1389 | if (bootmem_init_numa() < 0) | ||
| 1390 | bootmem_init_nonnuma(); | ||
| 1391 | |||
| 1392 | /* XXX cpu notifier XXX */ | ||
| 1393 | |||
| 1394 | for_each_online_node(nid) | ||
| 1395 | bootmem_init_one_node(nid); | ||
| 997 | 1396 | ||
| 998 | sparse_init(); | 1397 | sparse_init(); |
| 999 | 1398 | ||
| @@ -1289,7 +1688,7 @@ void __init setup_per_cpu_areas(void) | |||
| 1289 | 1688 | ||
| 1290 | void __init paging_init(void) | 1689 | void __init paging_init(void) |
| 1291 | { | 1690 | { |
| 1292 | unsigned long end_pfn, pages_avail, shift, phys_base; | 1691 | unsigned long end_pfn, shift, phys_base; |
| 1293 | unsigned long real_end, i; | 1692 | unsigned long real_end, i; |
| 1294 | 1693 | ||
| 1295 | /* These build time checkes make sure that the dcache_dirty_cpu() | 1694 | /* These build time checkes make sure that the dcache_dirty_cpu() |
| @@ -1330,12 +1729,26 @@ void __init paging_init(void) | |||
| 1330 | sun4v_ktsb_init(); | 1729 | sun4v_ktsb_init(); |
| 1331 | } | 1730 | } |
| 1332 | 1731 | ||
| 1732 | lmb_init(); | ||
| 1733 | |||
| 1333 | /* Find available physical memory... */ | 1734 | /* Find available physical memory... */ |
| 1334 | read_obp_memory("available", &pavail[0], &pavail_ents); | 1735 | read_obp_memory("available", &pavail[0], &pavail_ents); |
| 1335 | 1736 | ||
| 1336 | phys_base = 0xffffffffffffffffUL; | 1737 | phys_base = 0xffffffffffffffffUL; |
| 1337 | for (i = 0; i < pavail_ents; i++) | 1738 | for (i = 0; i < pavail_ents; i++) { |
| 1338 | phys_base = min(phys_base, pavail[i].phys_addr); | 1739 | phys_base = min(phys_base, pavail[i].phys_addr); |
| 1740 | lmb_add(pavail[i].phys_addr, pavail[i].reg_size); | ||
| 1741 | } | ||
| 1742 | |||
| 1743 | lmb_reserve(kern_base, kern_size); | ||
| 1744 | |||
| 1745 | find_ramdisk(phys_base); | ||
| 1746 | |||
| 1747 | if (cmdline_memory_size) | ||
| 1748 | lmb_enforce_memory_limit(phys_base + cmdline_memory_size); | ||
| 1749 | |||
| 1750 | lmb_analyze(); | ||
| 1751 | lmb_dump_all(); | ||
| 1339 | 1752 | ||
| 1340 | set_bit(0, mmu_context_bmap); | 1753 | set_bit(0, mmu_context_bmap); |
| 1341 | 1754 | ||
| @@ -1371,14 +1784,10 @@ void __init paging_init(void) | |||
| 1371 | if (tlb_type == hypervisor) | 1784 | if (tlb_type == hypervisor) |
| 1372 | sun4v_ktsb_register(); | 1785 | sun4v_ktsb_register(); |
| 1373 | 1786 | ||
| 1374 | /* Setup bootmem... */ | 1787 | /* We must setup the per-cpu areas before we pull in the |
| 1375 | pages_avail = 0; | 1788 | * PROM and the MDESC. The code there fills in cpu and |
| 1376 | last_valid_pfn = end_pfn = bootmem_init(&pages_avail, phys_base); | 1789 | * other information into per-cpu data structures. |
| 1377 | 1790 | */ | |
| 1378 | max_mapnr = last_valid_pfn; | ||
| 1379 | |||
| 1380 | kernel_physical_mapping_init(); | ||
| 1381 | |||
| 1382 | real_setup_per_cpu_areas(); | 1791 | real_setup_per_cpu_areas(); |
| 1383 | 1792 | ||
| 1384 | prom_build_devicetree(); | 1793 | prom_build_devicetree(); |
| @@ -1386,20 +1795,22 @@ void __init paging_init(void) | |||
| 1386 | if (tlb_type == hypervisor) | 1795 | if (tlb_type == hypervisor) |
| 1387 | sun4v_mdesc_init(); | 1796 | sun4v_mdesc_init(); |
| 1388 | 1797 | ||
| 1798 | /* Setup bootmem... */ | ||
| 1799 | last_valid_pfn = end_pfn = bootmem_init(phys_base); | ||
| 1800 | |||
| 1801 | #ifndef CONFIG_NEED_MULTIPLE_NODES | ||
| 1802 | max_mapnr = last_valid_pfn; | ||
| 1803 | #endif | ||
| 1804 | kernel_physical_mapping_init(); | ||
| 1805 | |||
| 1389 | { | 1806 | { |
| 1390 | unsigned long zones_size[MAX_NR_ZONES]; | 1807 | unsigned long max_zone_pfns[MAX_NR_ZONES]; |
| 1391 | unsigned long zholes_size[MAX_NR_ZONES]; | ||
| 1392 | int znum; | ||
| 1393 | 1808 | ||
| 1394 | for (znum = 0; znum < MAX_NR_ZONES; znum++) | 1809 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
| 1395 | zones_size[znum] = zholes_size[znum] = 0; | ||
| 1396 | 1810 | ||
| 1397 | zones_size[ZONE_NORMAL] = end_pfn; | 1811 | max_zone_pfns[ZONE_NORMAL] = end_pfn; |
| 1398 | zholes_size[ZONE_NORMAL] = end_pfn - pages_avail; | ||
| 1399 | 1812 | ||
| 1400 | free_area_init_node(0, &contig_page_data, zones_size, | 1813 | free_area_init_nodes(max_zone_pfns); |
| 1401 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, | ||
| 1402 | zholes_size); | ||
| 1403 | } | 1814 | } |
| 1404 | 1815 | ||
| 1405 | printk("Booting Linux...\n"); | 1816 | printk("Booting Linux...\n"); |
| @@ -1408,21 +1819,52 @@ void __init paging_init(void) | |||
| 1408 | cpu_probe(); | 1819 | cpu_probe(); |
| 1409 | } | 1820 | } |
| 1410 | 1821 | ||
| 1411 | static void __init taint_real_pages(void) | 1822 | int __init page_in_phys_avail(unsigned long paddr) |
| 1823 | { | ||
| 1824 | int i; | ||
| 1825 | |||
| 1826 | paddr &= PAGE_MASK; | ||
| 1827 | |||
| 1828 | for (i = 0; i < pavail_ents; i++) { | ||
| 1829 | unsigned long start, end; | ||
| 1830 | |||
| 1831 | start = pavail[i].phys_addr; | ||
| 1832 | end = start + pavail[i].reg_size; | ||
| 1833 | |||
| 1834 | if (paddr >= start && paddr < end) | ||
| 1835 | return 1; | ||
| 1836 | } | ||
| 1837 | if (paddr >= kern_base && paddr < (kern_base + kern_size)) | ||
| 1838 | return 1; | ||
| 1839 | #ifdef CONFIG_BLK_DEV_INITRD | ||
| 1840 | if (paddr >= __pa(initrd_start) && | ||
| 1841 | paddr < __pa(PAGE_ALIGN(initrd_end))) | ||
| 1842 | return 1; | ||
| 1843 | #endif | ||
| 1844 | |||
| 1845 | return 0; | ||
| 1846 | } | ||
| 1847 | |||
| 1848 | static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata; | ||
| 1849 | static int pavail_rescan_ents __initdata; | ||
| 1850 | |||
| 1851 | /* Certain OBP calls, such as fetching "available" properties, can | ||
| 1852 | * claim physical memory. So, along with initializing the valid | ||
| 1853 | * address bitmap, what we do here is refetch the physical available | ||
| 1854 | * memory list again, and make sure it provides at least as much | ||
| 1855 | * memory as 'pavail' does. | ||
| 1856 | */ | ||
| 1857 | static void setup_valid_addr_bitmap_from_pavail(void) | ||
| 1412 | { | 1858 | { |
| 1413 | int i; | 1859 | int i; |
| 1414 | 1860 | ||
| 1415 | read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents); | 1861 | read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents); |
| 1416 | 1862 | ||
| 1417 | /* Find changes discovered in the physmem available rescan and | ||
| 1418 | * reserve the lost portions in the bootmem maps. | ||
| 1419 | */ | ||
| 1420 | for (i = 0; i < pavail_ents; i++) { | 1863 | for (i = 0; i < pavail_ents; i++) { |
| 1421 | unsigned long old_start, old_end; | 1864 | unsigned long old_start, old_end; |
| 1422 | 1865 | ||
| 1423 | old_start = pavail[i].phys_addr; | 1866 | old_start = pavail[i].phys_addr; |
| 1424 | old_end = old_start + | 1867 | old_end = old_start + pavail[i].reg_size; |
| 1425 | pavail[i].reg_size; | ||
| 1426 | while (old_start < old_end) { | 1868 | while (old_start < old_end) { |
| 1427 | int n; | 1869 | int n; |
| 1428 | 1870 | ||
| @@ -1440,7 +1882,16 @@ static void __init taint_real_pages(void) | |||
| 1440 | goto do_next_page; | 1882 | goto do_next_page; |
| 1441 | } | 1883 | } |
| 1442 | } | 1884 | } |
| 1443 | reserve_bootmem(old_start, PAGE_SIZE, BOOTMEM_DEFAULT); | 1885 | |
| 1886 | prom_printf("mem_init: Lost memory in pavail\n"); | ||
| 1887 | prom_printf("mem_init: OLD start[%lx] size[%lx]\n", | ||
| 1888 | pavail[i].phys_addr, | ||
| 1889 | pavail[i].reg_size); | ||
| 1890 | prom_printf("mem_init: NEW start[%lx] size[%lx]\n", | ||
| 1891 | pavail_rescan[i].phys_addr, | ||
| 1892 | pavail_rescan[i].reg_size); | ||
| 1893 | prom_printf("mem_init: Cannot continue, aborting.\n"); | ||
| 1894 | prom_halt(); | ||
| 1444 | 1895 | ||
| 1445 | do_next_page: | 1896 | do_next_page: |
| 1446 | old_start += PAGE_SIZE; | 1897 | old_start += PAGE_SIZE; |
| @@ -1448,32 +1899,6 @@ static void __init taint_real_pages(void) | |||
| 1448 | } | 1899 | } |
| 1449 | } | 1900 | } |
| 1450 | 1901 | ||
| 1451 | int __init page_in_phys_avail(unsigned long paddr) | ||
| 1452 | { | ||
| 1453 | int i; | ||
| 1454 | |||
| 1455 | paddr &= PAGE_MASK; | ||
| 1456 | |||
| 1457 | for (i = 0; i < pavail_rescan_ents; i++) { | ||
| 1458 | unsigned long start, end; | ||
| 1459 | |||
| 1460 | start = pavail_rescan[i].phys_addr; | ||
| 1461 | end = start + pavail_rescan[i].reg_size; | ||
| 1462 | |||
| 1463 | if (paddr >= start && paddr < end) | ||
| 1464 | return 1; | ||
| 1465 | } | ||
| 1466 | if (paddr >= kern_base && paddr < (kern_base + kern_size)) | ||
| 1467 | return 1; | ||
| 1468 | #ifdef CONFIG_BLK_DEV_INITRD | ||
| 1469 | if (paddr >= __pa(initrd_start) && | ||
| 1470 | paddr < __pa(PAGE_ALIGN(initrd_end))) | ||
| 1471 | return 1; | ||
| 1472 | #endif | ||
| 1473 | |||
| 1474 | return 0; | ||
| 1475 | } | ||
| 1476 | |||
| 1477 | void __init mem_init(void) | 1902 | void __init mem_init(void) |
| 1478 | { | 1903 | { |
| 1479 | unsigned long codepages, datapages, initpages; | 1904 | unsigned long codepages, datapages, initpages; |
| @@ -1496,14 +1921,26 @@ void __init mem_init(void) | |||
| 1496 | addr += PAGE_SIZE; | 1921 | addr += PAGE_SIZE; |
| 1497 | } | 1922 | } |
| 1498 | 1923 | ||
| 1499 | taint_real_pages(); | 1924 | setup_valid_addr_bitmap_from_pavail(); |
| 1500 | 1925 | ||
| 1501 | high_memory = __va(last_valid_pfn << PAGE_SHIFT); | 1926 | high_memory = __va(last_valid_pfn << PAGE_SHIFT); |
| 1502 | 1927 | ||
| 1928 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
| 1929 | for_each_online_node(i) { | ||
| 1930 | if (NODE_DATA(i)->node_spanned_pages != 0) { | ||
| 1931 | totalram_pages += | ||
| 1932 | free_all_bootmem_node(NODE_DATA(i)); | ||
| 1933 | } | ||
| 1934 | } | ||
| 1935 | #else | ||
| 1936 | totalram_pages = free_all_bootmem(); | ||
| 1937 | #endif | ||
| 1938 | |||
| 1503 | /* We subtract one to account for the mem_map_zero page | 1939 | /* We subtract one to account for the mem_map_zero page |
| 1504 | * allocated below. | 1940 | * allocated below. |
| 1505 | */ | 1941 | */ |
| 1506 | totalram_pages = num_physpages = free_all_bootmem() - 1; | 1942 | totalram_pages -= 1; |
| 1943 | num_physpages = totalram_pages; | ||
| 1507 | 1944 | ||
| 1508 | /* | 1945 | /* |
| 1509 | * Set up the zero page, mark it reserved, so that page count | 1946 | * Set up the zero page, mark it reserved, so that page count |
