diff options
-rw-r--r-- | include/linux/mm.h | 47 | ||||
-rw-r--r-- | include/linux/mmzone.h | 10 | ||||
-rw-r--r-- | mm/page_alloc.c | 552 |
3 files changed, 584 insertions, 25 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 856f0ee7e84a..c0402da7cce0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -937,6 +937,53 @@ extern void free_area_init(unsigned long * zones_size); | |||
937 | extern void free_area_init_node(int nid, pg_data_t *pgdat, | 937 | extern void free_area_init_node(int nid, pg_data_t *pgdat, |
938 | unsigned long * zones_size, unsigned long zone_start_pfn, | 938 | unsigned long * zones_size, unsigned long zone_start_pfn, |
939 | unsigned long *zholes_size); | 939 | unsigned long *zholes_size); |
940 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | ||
941 | /* | ||
942 | * With CONFIG_ARCH_POPULATES_NODE_MAP set, an architecture may initialise its | ||
943 | * zones, allocate the backing mem_map and account for memory holes in a more | ||
944 | * architecture independent manner. This is a substitute for creating the | ||
945 | * zone_sizes[] and zholes_size[] arrays and passing them to | ||
946 | * free_area_init_node() | ||
947 | * | ||
948 | * An architecture is expected to register range of page frames backed by | ||
949 | * physical memory with add_active_range() before calling | ||
950 | * free_area_init_nodes() passing in the PFN each zone ends at. At a basic | ||
951 | * usage, an architecture is expected to do something like | ||
952 | * | ||
953 | * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn, | ||
954 | * max_highmem_pfn}; | ||
955 | * for_each_valid_physical_page_range() | ||
956 | * add_active_range(node_id, start_pfn, end_pfn) | ||
957 | * free_area_init_nodes(max_zone_pfns); | ||
958 | * | ||
959 | * If the architecture guarantees that there are no holes in the ranges | ||
960 | * registered with add_active_range(), free_bootmem_active_regions() | ||
961 | * will call free_bootmem_node() for each registered physical page range. | ||
962 | * Similarly sparse_memory_present_with_active_regions() calls | ||
963 | * memory_present() for each range when SPARSEMEM is enabled. | ||
964 | * | ||
965 | * See mm/page_alloc.c for more information on each function exposed by | ||
966 | * CONFIG_ARCH_POPULATES_NODE_MAP | ||
967 | */ | ||
968 | extern void free_area_init_nodes(unsigned long *max_zone_pfn); | ||
969 | extern void add_active_range(unsigned int nid, unsigned long start_pfn, | ||
970 | unsigned long end_pfn); | ||
971 | extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn, | ||
972 | unsigned long new_end_pfn); | ||
973 | extern void remove_all_active_ranges(void); | ||
974 | extern unsigned long absent_pages_in_range(unsigned long start_pfn, | ||
975 | unsigned long end_pfn); | ||
976 | extern void get_pfn_range_for_nid(unsigned int nid, | ||
977 | unsigned long *start_pfn, unsigned long *end_pfn); | ||
978 | extern unsigned long find_min_pfn_with_active_regions(void); | ||
979 | extern unsigned long find_max_pfn_with_active_regions(void); | ||
980 | extern void free_bootmem_with_active_regions(int nid, | ||
981 | unsigned long max_low_pfn); | ||
982 | extern void sparse_memory_present_with_active_regions(int nid); | ||
983 | #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID | ||
984 | extern int early_pfn_to_nid(unsigned long pfn); | ||
985 | #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ | ||
986 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ | ||
940 | extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long); | 987 | extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long); |
941 | extern void setup_per_zone_pages_min(void); | 988 | extern void setup_per_zone_pages_min(void); |
942 | extern void mem_init(void); | 989 | extern void mem_init(void); |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3693f1a52788..7fa1cbe9fa7a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -305,6 +305,13 @@ struct zonelist { | |||
305 | struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited | 305 | struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited |
306 | }; | 306 | }; |
307 | 307 | ||
308 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | ||
309 | struct node_active_region { | ||
310 | unsigned long start_pfn; | ||
311 | unsigned long end_pfn; | ||
312 | int nid; | ||
313 | }; | ||
314 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ | ||
308 | 315 | ||
309 | /* | 316 | /* |
310 | * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM | 317 | * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM |
@@ -518,7 +525,8 @@ extern struct zone *next_zone(struct zone *zone); | |||
518 | 525 | ||
519 | #endif | 526 | #endif |
520 | 527 | ||
521 | #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID | 528 | #if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \ |
529 | !defined(CONFIG_ARCH_POPULATES_NODE_MAP) | ||
522 | #define early_pfn_to_nid(nid) (0UL) | 530 | #define early_pfn_to_nid(nid) (0UL) |
523 | #endif | 531 | #endif |
524 | 532 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9810f0a60db7..26c9939857fa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -37,6 +37,8 @@ | |||
37 | #include <linux/vmalloc.h> | 37 | #include <linux/vmalloc.h> |
38 | #include <linux/mempolicy.h> | 38 | #include <linux/mempolicy.h> |
39 | #include <linux/stop_machine.h> | 39 | #include <linux/stop_machine.h> |
40 | #include <linux/sort.h> | ||
41 | #include <linux/pfn.h> | ||
40 | 42 | ||
41 | #include <asm/tlbflush.h> | 43 | #include <asm/tlbflush.h> |
42 | #include <asm/div64.h> | 44 | #include <asm/div64.h> |
@@ -103,6 +105,33 @@ int min_free_kbytes = 1024; | |||
103 | unsigned long __meminitdata nr_kernel_pages; | 105 | unsigned long __meminitdata nr_kernel_pages; |
104 | unsigned long __meminitdata nr_all_pages; | 106 | unsigned long __meminitdata nr_all_pages; |
105 | 107 | ||
108 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | ||
109 | /* | ||
110 | * MAX_ACTIVE_REGIONS determines the maxmimum number of distinct | ||
111 | * ranges of memory (RAM) that may be registered with add_active_range(). | ||
112 | * Ranges passed to add_active_range() will be merged if possible | ||
113 | * so the number of times add_active_range() can be called is | ||
114 | * related to the number of nodes and the number of holes | ||
115 | */ | ||
116 | #ifdef CONFIG_MAX_ACTIVE_REGIONS | ||
117 | /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */ | ||
118 | #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS | ||
119 | #else | ||
120 | #if MAX_NUMNODES >= 32 | ||
121 | /* If there can be many nodes, allow up to 50 holes per node */ | ||
122 | #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50) | ||
123 | #else | ||
124 | /* By default, allow up to 256 distinct regions */ | ||
125 | #define MAX_ACTIVE_REGIONS 256 | ||
126 | #endif | ||
127 | #endif | ||
128 | |||
129 | struct node_active_region __initdata early_node_map[MAX_ACTIVE_REGIONS]; | ||
130 | int __initdata nr_nodemap_entries; | ||
131 | unsigned long __initdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; | ||
132 | unsigned long __initdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; | ||
133 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ | ||
134 | |||
106 | #ifdef CONFIG_DEBUG_VM | 135 | #ifdef CONFIG_DEBUG_VM |
107 | static int page_outside_zone_boundaries(struct zone *zone, struct page *page) | 136 | static int page_outside_zone_boundaries(struct zone *zone, struct page *page) |
108 | { | 137 | { |
@@ -1642,25 +1671,6 @@ static inline unsigned long wait_table_bits(unsigned long size) | |||
1642 | 1671 | ||
1643 | #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) | 1672 | #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) |
1644 | 1673 | ||
1645 | static void __init calculate_zone_totalpages(struct pglist_data *pgdat, | ||
1646 | unsigned long *zones_size, unsigned long *zholes_size) | ||
1647 | { | ||
1648 | unsigned long realtotalpages, totalpages = 0; | ||
1649 | enum zone_type i; | ||
1650 | |||
1651 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
1652 | totalpages += zones_size[i]; | ||
1653 | pgdat->node_spanned_pages = totalpages; | ||
1654 | |||
1655 | realtotalpages = totalpages; | ||
1656 | if (zholes_size) | ||
1657 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
1658 | realtotalpages -= zholes_size[i]; | ||
1659 | pgdat->node_present_pages = realtotalpages; | ||
1660 | printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages); | ||
1661 | } | ||
1662 | |||
1663 | |||
1664 | /* | 1674 | /* |
1665 | * Initially all pages are reserved - free ones are freed | 1675 | * Initially all pages are reserved - free ones are freed |
1666 | * up by free_all_bootmem() once the early boot process is | 1676 | * up by free_all_bootmem() once the early boot process is |
@@ -1977,6 +1987,272 @@ __meminit int init_currently_empty_zone(struct zone *zone, | |||
1977 | return 0; | 1987 | return 0; |
1978 | } | 1988 | } |
1979 | 1989 | ||
1990 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | ||
1991 | /* | ||
1992 | * Basic iterator support. Return the first range of PFNs for a node | ||
1993 | * Note: nid == MAX_NUMNODES returns first region regardless of node | ||
1994 | */ | ||
1995 | static int __init first_active_region_index_in_nid(int nid) | ||
1996 | { | ||
1997 | int i; | ||
1998 | |||
1999 | for (i = 0; i < nr_nodemap_entries; i++) | ||
2000 | if (nid == MAX_NUMNODES || early_node_map[i].nid == nid) | ||
2001 | return i; | ||
2002 | |||
2003 | return -1; | ||
2004 | } | ||
2005 | |||
2006 | /* | ||
2007 | * Basic iterator support. Return the next active range of PFNs for a node | ||
2008 | * Note: nid == MAX_NUMNODES returns next region regardles of node | ||
2009 | */ | ||
2010 | static int __init next_active_region_index_in_nid(int index, int nid) | ||
2011 | { | ||
2012 | for (index = index + 1; index < nr_nodemap_entries; index++) | ||
2013 | if (nid == MAX_NUMNODES || early_node_map[index].nid == nid) | ||
2014 | return index; | ||
2015 | |||
2016 | return -1; | ||
2017 | } | ||
2018 | |||
2019 | #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID | ||
2020 | /* | ||
2021 | * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. | ||
2022 | * Architectures may implement their own version but if add_active_range() | ||
2023 | * was used and there are no special requirements, this is a convenient | ||
2024 | * alternative | ||
2025 | */ | ||
2026 | int __init early_pfn_to_nid(unsigned long pfn) | ||
2027 | { | ||
2028 | int i; | ||
2029 | |||
2030 | for (i = 0; i < nr_nodemap_entries; i++) { | ||
2031 | unsigned long start_pfn = early_node_map[i].start_pfn; | ||
2032 | unsigned long end_pfn = early_node_map[i].end_pfn; | ||
2033 | |||
2034 | if (start_pfn <= pfn && pfn < end_pfn) | ||
2035 | return early_node_map[i].nid; | ||
2036 | } | ||
2037 | |||
2038 | return 0; | ||
2039 | } | ||
2040 | #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ | ||
2041 | |||
2042 | /* Basic iterator support to walk early_node_map[] */ | ||
2043 | #define for_each_active_range_index_in_nid(i, nid) \ | ||
2044 | for (i = first_active_region_index_in_nid(nid); i != -1; \ | ||
2045 | i = next_active_region_index_in_nid(i, nid)) | ||
2046 | |||
2047 | /** | ||
2048 | * free_bootmem_with_active_regions - Call free_bootmem_node for each active range | ||
2049 | * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed | ||
2050 | * @max_low_pfn: The highest PFN that till be passed to free_bootmem_node | ||
2051 | * | ||
2052 | * If an architecture guarantees that all ranges registered with | ||
2053 | * add_active_ranges() contain no holes and may be freed, this | ||
2054 | * this function may be used instead of calling free_bootmem() manually. | ||
2055 | */ | ||
2056 | void __init free_bootmem_with_active_regions(int nid, | ||
2057 | unsigned long max_low_pfn) | ||
2058 | { | ||
2059 | int i; | ||
2060 | |||
2061 | for_each_active_range_index_in_nid(i, nid) { | ||
2062 | unsigned long size_pages = 0; | ||
2063 | unsigned long end_pfn = early_node_map[i].end_pfn; | ||
2064 | |||
2065 | if (early_node_map[i].start_pfn >= max_low_pfn) | ||
2066 | continue; | ||
2067 | |||
2068 | if (end_pfn > max_low_pfn) | ||
2069 | end_pfn = max_low_pfn; | ||
2070 | |||
2071 | size_pages = end_pfn - early_node_map[i].start_pfn; | ||
2072 | free_bootmem_node(NODE_DATA(early_node_map[i].nid), | ||
2073 | PFN_PHYS(early_node_map[i].start_pfn), | ||
2074 | size_pages << PAGE_SHIFT); | ||
2075 | } | ||
2076 | } | ||
2077 | |||
2078 | /** | ||
2079 | * sparse_memory_present_with_active_regions - Call memory_present for each active range | ||
2080 | * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used | ||
2081 | * | ||
2082 | * If an architecture guarantees that all ranges registered with | ||
2083 | * add_active_ranges() contain no holes and may be freed, this | ||
2084 | * this function may be used instead of calling memory_present() manually. | ||
2085 | */ | ||
2086 | void __init sparse_memory_present_with_active_regions(int nid) | ||
2087 | { | ||
2088 | int i; | ||
2089 | |||
2090 | for_each_active_range_index_in_nid(i, nid) | ||
2091 | memory_present(early_node_map[i].nid, | ||
2092 | early_node_map[i].start_pfn, | ||
2093 | early_node_map[i].end_pfn); | ||
2094 | } | ||
2095 | |||
2096 | /** | ||
2097 | * get_pfn_range_for_nid - Return the start and end page frames for a node | ||
2098 | * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned | ||
2099 | * @start_pfn: Passed by reference. On return, it will have the node start_pfn | ||
2100 | * @end_pfn: Passed by reference. On return, it will have the node end_pfn | ||
2101 | * | ||
2102 | * It returns the start and end page frame of a node based on information | ||
2103 | * provided by an arch calling add_active_range(). If called for a node | ||
2104 | * with no available memory, a warning is printed and the start and end | ||
2105 | * PFNs will be 0 | ||
2106 | */ | ||
2107 | void __init get_pfn_range_for_nid(unsigned int nid, | ||
2108 | unsigned long *start_pfn, unsigned long *end_pfn) | ||
2109 | { | ||
2110 | int i; | ||
2111 | *start_pfn = -1UL; | ||
2112 | *end_pfn = 0; | ||
2113 | |||
2114 | for_each_active_range_index_in_nid(i, nid) { | ||
2115 | *start_pfn = min(*start_pfn, early_node_map[i].start_pfn); | ||
2116 | *end_pfn = max(*end_pfn, early_node_map[i].end_pfn); | ||
2117 | } | ||
2118 | |||
2119 | if (*start_pfn == -1UL) { | ||
2120 | printk(KERN_WARNING "Node %u active with no memory\n", nid); | ||
2121 | *start_pfn = 0; | ||
2122 | } | ||
2123 | } | ||
2124 | |||
2125 | /* | ||
2126 | * Return the number of pages a zone spans in a node, including holes | ||
2127 | * present_pages = zone_spanned_pages_in_node() - zone_absent_pages_in_node() | ||
2128 | */ | ||
2129 | unsigned long __init zone_spanned_pages_in_node(int nid, | ||
2130 | unsigned long zone_type, | ||
2131 | unsigned long *ignored) | ||
2132 | { | ||
2133 | unsigned long node_start_pfn, node_end_pfn; | ||
2134 | unsigned long zone_start_pfn, zone_end_pfn; | ||
2135 | |||
2136 | /* Get the start and end of the node and zone */ | ||
2137 | get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); | ||
2138 | zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type]; | ||
2139 | zone_end_pfn = arch_zone_highest_possible_pfn[zone_type]; | ||
2140 | |||
2141 | /* Check that this node has pages within the zone's required range */ | ||
2142 | if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn) | ||
2143 | return 0; | ||
2144 | |||
2145 | /* Move the zone boundaries inside the node if necessary */ | ||
2146 | zone_end_pfn = min(zone_end_pfn, node_end_pfn); | ||
2147 | zone_start_pfn = max(zone_start_pfn, node_start_pfn); | ||
2148 | |||
2149 | /* Return the spanned pages */ | ||
2150 | return zone_end_pfn - zone_start_pfn; | ||
2151 | } | ||
2152 | |||
2153 | /* | ||
2154 | * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, | ||
2155 | * then all holes in the requested range will be accounted for | ||
2156 | */ | ||
2157 | unsigned long __init __absent_pages_in_range(int nid, | ||
2158 | unsigned long range_start_pfn, | ||
2159 | unsigned long range_end_pfn) | ||
2160 | { | ||
2161 | int i = 0; | ||
2162 | unsigned long prev_end_pfn = 0, hole_pages = 0; | ||
2163 | unsigned long start_pfn; | ||
2164 | |||
2165 | /* Find the end_pfn of the first active range of pfns in the node */ | ||
2166 | i = first_active_region_index_in_nid(nid); | ||
2167 | if (i == -1) | ||
2168 | return 0; | ||
2169 | |||
2170 | prev_end_pfn = early_node_map[i].start_pfn; | ||
2171 | |||
2172 | /* Find all holes for the zone within the node */ | ||
2173 | for (; i != -1; i = next_active_region_index_in_nid(i, nid)) { | ||
2174 | |||
2175 | /* No need to continue if prev_end_pfn is outside the zone */ | ||
2176 | if (prev_end_pfn >= range_end_pfn) | ||
2177 | break; | ||
2178 | |||
2179 | /* Make sure the end of the zone is not within the hole */ | ||
2180 | start_pfn = min(early_node_map[i].start_pfn, range_end_pfn); | ||
2181 | prev_end_pfn = max(prev_end_pfn, range_start_pfn); | ||
2182 | |||
2183 | /* Update the hole size cound and move on */ | ||
2184 | if (start_pfn > range_start_pfn) { | ||
2185 | BUG_ON(prev_end_pfn > start_pfn); | ||
2186 | hole_pages += start_pfn - prev_end_pfn; | ||
2187 | } | ||
2188 | prev_end_pfn = early_node_map[i].end_pfn; | ||
2189 | } | ||
2190 | |||
2191 | return hole_pages; | ||
2192 | } | ||
2193 | |||
2194 | /** | ||
2195 | * absent_pages_in_range - Return number of page frames in holes within a range | ||
2196 | * @start_pfn: The start PFN to start searching for holes | ||
2197 | * @end_pfn: The end PFN to stop searching for holes | ||
2198 | * | ||
2199 | * It returns the number of pages frames in memory holes within a range | ||
2200 | */ | ||
2201 | unsigned long __init absent_pages_in_range(unsigned long start_pfn, | ||
2202 | unsigned long end_pfn) | ||
2203 | { | ||
2204 | return __absent_pages_in_range(MAX_NUMNODES, start_pfn, end_pfn); | ||
2205 | } | ||
2206 | |||
2207 | /* Return the number of page frames in holes in a zone on a node */ | ||
2208 | unsigned long __init zone_absent_pages_in_node(int nid, | ||
2209 | unsigned long zone_type, | ||
2210 | unsigned long *ignored) | ||
2211 | { | ||
2212 | return __absent_pages_in_range(nid, | ||
2213 | arch_zone_lowest_possible_pfn[zone_type], | ||
2214 | arch_zone_highest_possible_pfn[zone_type]); | ||
2215 | } | ||
2216 | #else | ||
2217 | static inline unsigned long zone_spanned_pages_in_node(int nid, | ||
2218 | unsigned long zone_type, | ||
2219 | unsigned long *zones_size) | ||
2220 | { | ||
2221 | return zones_size[zone_type]; | ||
2222 | } | ||
2223 | |||
2224 | static inline unsigned long zone_absent_pages_in_node(int nid, | ||
2225 | unsigned long zone_type, | ||
2226 | unsigned long *zholes_size) | ||
2227 | { | ||
2228 | if (!zholes_size) | ||
2229 | return 0; | ||
2230 | |||
2231 | return zholes_size[zone_type]; | ||
2232 | } | ||
2233 | #endif | ||
2234 | |||
2235 | static void __init calculate_node_totalpages(struct pglist_data *pgdat, | ||
2236 | unsigned long *zones_size, unsigned long *zholes_size) | ||
2237 | { | ||
2238 | unsigned long realtotalpages, totalpages = 0; | ||
2239 | enum zone_type i; | ||
2240 | |||
2241 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
2242 | totalpages += zone_spanned_pages_in_node(pgdat->node_id, i, | ||
2243 | zones_size); | ||
2244 | pgdat->node_spanned_pages = totalpages; | ||
2245 | |||
2246 | realtotalpages = totalpages; | ||
2247 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
2248 | realtotalpages -= | ||
2249 | zone_absent_pages_in_node(pgdat->node_id, i, | ||
2250 | zholes_size); | ||
2251 | pgdat->node_present_pages = realtotalpages; | ||
2252 | printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id, | ||
2253 | realtotalpages); | ||
2254 | } | ||
2255 | |||
1980 | /* | 2256 | /* |
1981 | * Set up the zone data structures: | 2257 | * Set up the zone data structures: |
1982 | * - mark all pages reserved | 2258 | * - mark all pages reserved |
@@ -2000,9 +2276,9 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, | |||
2000 | struct zone *zone = pgdat->node_zones + j; | 2276 | struct zone *zone = pgdat->node_zones + j; |
2001 | unsigned long size, realsize; | 2277 | unsigned long size, realsize; |
2002 | 2278 | ||
2003 | realsize = size = zones_size[j]; | 2279 | size = zone_spanned_pages_in_node(nid, j, zones_size); |
2004 | if (zholes_size) | 2280 | realsize = size - zone_absent_pages_in_node(nid, j, |
2005 | realsize -= zholes_size[j]; | 2281 | zholes_size); |
2006 | 2282 | ||
2007 | if (!is_highmem_idx(j)) | 2283 | if (!is_highmem_idx(j)) |
2008 | nr_kernel_pages += realsize; | 2284 | nr_kernel_pages += realsize; |
@@ -2073,8 +2349,13 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) | |||
2073 | /* | 2349 | /* |
2074 | * With no DISCONTIG, the global mem_map is just set as node 0's | 2350 | * With no DISCONTIG, the global mem_map is just set as node 0's |
2075 | */ | 2351 | */ |
2076 | if (pgdat == NODE_DATA(0)) | 2352 | if (pgdat == NODE_DATA(0)) { |
2077 | mem_map = NODE_DATA(0)->node_mem_map; | 2353 | mem_map = NODE_DATA(0)->node_mem_map; |
2354 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | ||
2355 | if (page_to_pfn(mem_map) != pgdat->node_start_pfn) | ||
2356 | mem_map -= pgdat->node_start_pfn; | ||
2357 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ | ||
2358 | } | ||
2078 | #endif | 2359 | #endif |
2079 | #endif /* CONFIG_FLAT_NODE_MEM_MAP */ | 2360 | #endif /* CONFIG_FLAT_NODE_MEM_MAP */ |
2080 | } | 2361 | } |
@@ -2085,13 +2366,236 @@ void __meminit free_area_init_node(int nid, struct pglist_data *pgdat, | |||
2085 | { | 2366 | { |
2086 | pgdat->node_id = nid; | 2367 | pgdat->node_id = nid; |
2087 | pgdat->node_start_pfn = node_start_pfn; | 2368 | pgdat->node_start_pfn = node_start_pfn; |
2088 | calculate_zone_totalpages(pgdat, zones_size, zholes_size); | 2369 | calculate_node_totalpages(pgdat, zones_size, zholes_size); |
2089 | 2370 | ||
2090 | alloc_node_mem_map(pgdat); | 2371 | alloc_node_mem_map(pgdat); |
2091 | 2372 | ||
2092 | free_area_init_core(pgdat, zones_size, zholes_size); | 2373 | free_area_init_core(pgdat, zones_size, zholes_size); |
2093 | } | 2374 | } |
2094 | 2375 | ||
2376 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | ||
2377 | /** | ||
2378 | * add_active_range - Register a range of PFNs backed by physical memory | ||
2379 | * @nid: The node ID the range resides on | ||
2380 | * @start_pfn: The start PFN of the available physical memory | ||
2381 | * @end_pfn: The end PFN of the available physical memory | ||
2382 | * | ||
2383 | * These ranges are stored in an early_node_map[] and later used by | ||
2384 | * free_area_init_nodes() to calculate zone sizes and holes. If the | ||
2385 | * range spans a memory hole, it is up to the architecture to ensure | ||
2386 | * the memory is not freed by the bootmem allocator. If possible | ||
2387 | * the range being registered will be merged with existing ranges. | ||
2388 | */ | ||
2389 | void __init add_active_range(unsigned int nid, unsigned long start_pfn, | ||
2390 | unsigned long end_pfn) | ||
2391 | { | ||
2392 | int i; | ||
2393 | |||
2394 | printk(KERN_DEBUG "Entering add_active_range(%d, %lu, %lu) " | ||
2395 | "%d entries of %d used\n", | ||
2396 | nid, start_pfn, end_pfn, | ||
2397 | nr_nodemap_entries, MAX_ACTIVE_REGIONS); | ||
2398 | |||
2399 | /* Merge with existing active regions if possible */ | ||
2400 | for (i = 0; i < nr_nodemap_entries; i++) { | ||
2401 | if (early_node_map[i].nid != nid) | ||
2402 | continue; | ||
2403 | |||
2404 | /* Skip if an existing region covers this new one */ | ||
2405 | if (start_pfn >= early_node_map[i].start_pfn && | ||
2406 | end_pfn <= early_node_map[i].end_pfn) | ||
2407 | return; | ||
2408 | |||
2409 | /* Merge forward if suitable */ | ||
2410 | if (start_pfn <= early_node_map[i].end_pfn && | ||
2411 | end_pfn > early_node_map[i].end_pfn) { | ||
2412 | early_node_map[i].end_pfn = end_pfn; | ||
2413 | return; | ||
2414 | } | ||
2415 | |||
2416 | /* Merge backward if suitable */ | ||
2417 | if (start_pfn < early_node_map[i].end_pfn && | ||
2418 | end_pfn >= early_node_map[i].start_pfn) { | ||
2419 | early_node_map[i].start_pfn = start_pfn; | ||
2420 | return; | ||
2421 | } | ||
2422 | } | ||
2423 | |||
2424 | /* Check that early_node_map is large enough */ | ||
2425 | if (i >= MAX_ACTIVE_REGIONS) { | ||
2426 | printk(KERN_CRIT "More than %d memory regions, truncating\n", | ||
2427 | MAX_ACTIVE_REGIONS); | ||
2428 | return; | ||
2429 | } | ||
2430 | |||
2431 | early_node_map[i].nid = nid; | ||
2432 | early_node_map[i].start_pfn = start_pfn; | ||
2433 | early_node_map[i].end_pfn = end_pfn; | ||
2434 | nr_nodemap_entries = i + 1; | ||
2435 | } | ||
2436 | |||
2437 | /** | ||
2438 | * shrink_active_range - Shrink an existing registered range of PFNs | ||
2439 | * @nid: The node id the range is on that should be shrunk | ||
2440 | * @old_end_pfn: The old end PFN of the range | ||
2441 | * @new_end_pfn: The new PFN of the range | ||
2442 | * | ||
2443 | * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node. | ||
2444 | * The map is kept at the end physical page range that has already been | ||
2445 | * registered with add_active_range(). This function allows an arch to shrink | ||
2446 | * an existing registered range. | ||
2447 | */ | ||
2448 | void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn, | ||
2449 | unsigned long new_end_pfn) | ||
2450 | { | ||
2451 | int i; | ||
2452 | |||
2453 | /* Find the old active region end and shrink */ | ||
2454 | for_each_active_range_index_in_nid(i, nid) | ||
2455 | if (early_node_map[i].end_pfn == old_end_pfn) { | ||
2456 | early_node_map[i].end_pfn = new_end_pfn; | ||
2457 | break; | ||
2458 | } | ||
2459 | } | ||
2460 | |||
2461 | /** | ||
2462 | * remove_all_active_ranges - Remove all currently registered regions | ||
2463 | * During discovery, it may be found that a table like SRAT is invalid | ||
2464 | * and an alternative discovery method must be used. This function removes | ||
2465 | * all currently registered regions. | ||
2466 | */ | ||
2467 | void __init remove_all_active_ranges() | ||
2468 | { | ||
2469 | memset(early_node_map, 0, sizeof(early_node_map)); | ||
2470 | nr_nodemap_entries = 0; | ||
2471 | } | ||
2472 | |||
2473 | /* Compare two active node_active_regions */ | ||
2474 | static int __init cmp_node_active_region(const void *a, const void *b) | ||
2475 | { | ||
2476 | struct node_active_region *arange = (struct node_active_region *)a; | ||
2477 | struct node_active_region *brange = (struct node_active_region *)b; | ||
2478 | |||
2479 | /* Done this way to avoid overflows */ | ||
2480 | if (arange->start_pfn > brange->start_pfn) | ||
2481 | return 1; | ||
2482 | if (arange->start_pfn < brange->start_pfn) | ||
2483 | return -1; | ||
2484 | |||
2485 | return 0; | ||
2486 | } | ||
2487 | |||
2488 | /* sort the node_map by start_pfn */ | ||
2489 | static void __init sort_node_map(void) | ||
2490 | { | ||
2491 | sort(early_node_map, (size_t)nr_nodemap_entries, | ||
2492 | sizeof(struct node_active_region), | ||
2493 | cmp_node_active_region, NULL); | ||
2494 | } | ||
2495 | |||
2496 | /* Find the lowest pfn for a node. This depends on a sorted early_node_map */ | ||
2497 | unsigned long __init find_min_pfn_for_node(unsigned long nid) | ||
2498 | { | ||
2499 | int i; | ||
2500 | |||
2501 | /* Assuming a sorted map, the first range found has the starting pfn */ | ||
2502 | for_each_active_range_index_in_nid(i, nid) | ||
2503 | return early_node_map[i].start_pfn; | ||
2504 | |||
2505 | printk(KERN_WARNING "Could not find start_pfn for node %lu\n", nid); | ||
2506 | return 0; | ||
2507 | } | ||
2508 | |||
2509 | /** | ||
2510 | * find_min_pfn_with_active_regions - Find the minimum PFN registered | ||
2511 | * | ||
2512 | * It returns the minimum PFN based on information provided via | ||
2513 | * add_active_range() | ||
2514 | */ | ||
2515 | unsigned long __init find_min_pfn_with_active_regions(void) | ||
2516 | { | ||
2517 | return find_min_pfn_for_node(MAX_NUMNODES); | ||
2518 | } | ||
2519 | |||
2520 | /** | ||
2521 | * find_max_pfn_with_active_regions - Find the maximum PFN registered | ||
2522 | * | ||
2523 | * It returns the maximum PFN based on information provided via | ||
2524 | * add_active_range() | ||
2525 | */ | ||
2526 | unsigned long __init find_max_pfn_with_active_regions(void) | ||
2527 | { | ||
2528 | int i; | ||
2529 | unsigned long max_pfn = 0; | ||
2530 | |||
2531 | for (i = 0; i < nr_nodemap_entries; i++) | ||
2532 | max_pfn = max(max_pfn, early_node_map[i].end_pfn); | ||
2533 | |||
2534 | return max_pfn; | ||
2535 | } | ||
2536 | |||
2537 | /** | ||
2538 | * free_area_init_nodes - Initialise all pg_data_t and zone data | ||
2539 | * @arch_max_dma_pfn: The maximum PFN usable for ZONE_DMA | ||
2540 | * @arch_max_dma32_pfn: The maximum PFN usable for ZONE_DMA32 | ||
2541 | * @arch_max_low_pfn: The maximum PFN usable for ZONE_NORMAL | ||
2542 | * @arch_max_high_pfn: The maximum PFN usable for ZONE_HIGHMEM | ||
2543 | * | ||
2544 | * This will call free_area_init_node() for each active node in the system. | ||
2545 | * Using the page ranges provided by add_active_range(), the size of each | ||
2546 | * zone in each node and their holes is calculated. If the maximum PFN | ||
2547 | * between two adjacent zones match, it is assumed that the zone is empty. | ||
2548 | * For example, if arch_max_dma_pfn == arch_max_dma32_pfn, it is assumed | ||
2549 | * that arch_max_dma32_pfn has no pages. It is also assumed that a zone | ||
2550 | * starts where the previous one ended. For example, ZONE_DMA32 starts | ||
2551 | * at arch_max_dma_pfn. | ||
2552 | */ | ||
2553 | void __init free_area_init_nodes(unsigned long *max_zone_pfn) | ||
2554 | { | ||
2555 | unsigned long nid; | ||
2556 | enum zone_type i; | ||
2557 | |||
2558 | /* Record where the zone boundaries are */ | ||
2559 | memset(arch_zone_lowest_possible_pfn, 0, | ||
2560 | sizeof(arch_zone_lowest_possible_pfn)); | ||
2561 | memset(arch_zone_highest_possible_pfn, 0, | ||
2562 | sizeof(arch_zone_highest_possible_pfn)); | ||
2563 | arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions(); | ||
2564 | arch_zone_highest_possible_pfn[0] = max_zone_pfn[0]; | ||
2565 | for (i = 1; i < MAX_NR_ZONES; i++) { | ||
2566 | arch_zone_lowest_possible_pfn[i] = | ||
2567 | arch_zone_highest_possible_pfn[i-1]; | ||
2568 | arch_zone_highest_possible_pfn[i] = | ||
2569 | max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]); | ||
2570 | } | ||
2571 | |||
2572 | /* Regions in the early_node_map can be in any order */ | ||
2573 | sort_node_map(); | ||
2574 | |||
2575 | /* Print out the zone ranges */ | ||
2576 | printk("Zone PFN ranges:\n"); | ||
2577 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
2578 | printk(" %-8s %8lu -> %8lu\n", | ||
2579 | zone_names[i], | ||
2580 | arch_zone_lowest_possible_pfn[i], | ||
2581 | arch_zone_highest_possible_pfn[i]); | ||
2582 | |||
2583 | /* Print out the early_node_map[] */ | ||
2584 | printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries); | ||
2585 | for (i = 0; i < nr_nodemap_entries; i++) | ||
2586 | printk(" %3d: %8lu -> %8lu\n", early_node_map[i].nid, | ||
2587 | early_node_map[i].start_pfn, | ||
2588 | early_node_map[i].end_pfn); | ||
2589 | |||
2590 | /* Initialise every node */ | ||
2591 | for_each_online_node(nid) { | ||
2592 | pg_data_t *pgdat = NODE_DATA(nid); | ||
2593 | free_area_init_node(nid, pgdat, NULL, | ||
2594 | find_min_pfn_for_node(nid), NULL); | ||
2595 | } | ||
2596 | } | ||
2597 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ | ||
2598 | |||
2095 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 2599 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
2096 | static bootmem_data_t contig_bootmem_data; | 2600 | static bootmem_data_t contig_bootmem_data; |
2097 | struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data }; | 2601 | struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data }; |