diff options
Diffstat (limited to 'arch/i386/mm/discontig.c')
-rw-r--r-- | arch/i386/mm/discontig.c | 127 |
1 files changed, 83 insertions, 44 deletions
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 1726b4096b10..f429c871e845 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c | |||
@@ -29,12 +29,14 @@ | |||
29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
30 | #include <linux/initrd.h> | 30 | #include <linux/initrd.h> |
31 | #include <linux/nodemask.h> | 31 | #include <linux/nodemask.h> |
32 | #include <linux/module.h> | ||
32 | #include <asm/e820.h> | 33 | #include <asm/e820.h> |
33 | #include <asm/setup.h> | 34 | #include <asm/setup.h> |
34 | #include <asm/mmzone.h> | 35 | #include <asm/mmzone.h> |
35 | #include <bios_ebda.h> | 36 | #include <bios_ebda.h> |
36 | 37 | ||
37 | struct pglist_data *node_data[MAX_NUMNODES]; | 38 | struct pglist_data *node_data[MAX_NUMNODES]; |
39 | EXPORT_SYMBOL(node_data); | ||
38 | bootmem_data_t node0_bdata; | 40 | bootmem_data_t node0_bdata; |
39 | 41 | ||
40 | /* | 42 | /* |
@@ -42,12 +44,16 @@ bootmem_data_t node0_bdata; | |||
42 | * populated the following initialisation. | 44 | * populated the following initialisation. |
43 | * | 45 | * |
44 | * 1) node_online_map - the map of all nodes configured (online) in the system | 46 | * 1) node_online_map - the map of all nodes configured (online) in the system |
45 | * 2) physnode_map - the mapping between a pfn and owning node | 47 | * 2) node_start_pfn - the starting page frame number for a node |
46 | * 3) node_start_pfn - the starting page frame number for a node | ||
47 | * 3) node_end_pfn - the ending page fram number for a node | 48 | * 3) node_end_pfn - the ending page fram number for a node |
48 | */ | 49 | */ |
50 | unsigned long node_start_pfn[MAX_NUMNODES]; | ||
51 | unsigned long node_end_pfn[MAX_NUMNODES]; | ||
52 | |||
49 | 53 | ||
54 | #ifdef CONFIG_DISCONTIGMEM | ||
50 | /* | 55 | /* |
56 | * 4) physnode_map - the mapping between a pfn and owning node | ||
51 | * physnode_map keeps track of the physical memory layout of a generic | 57 | * physnode_map keeps track of the physical memory layout of a generic |
52 | * numa node on a 256Mb break (each element of the array will | 58 | * numa node on a 256Mb break (each element of the array will |
53 | * represent 256Mb of memory and will be marked by the node id. so, | 59 | * represent 256Mb of memory and will be marked by the node id. so, |
@@ -59,6 +65,7 @@ bootmem_data_t node0_bdata; | |||
59 | * physnode_map[8- ] = -1; | 65 | * physnode_map[8- ] = -1; |
60 | */ | 66 | */ |
61 | s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1}; | 67 | s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1}; |
68 | EXPORT_SYMBOL(physnode_map); | ||
62 | 69 | ||
63 | void memory_present(int nid, unsigned long start, unsigned long end) | 70 | void memory_present(int nid, unsigned long start, unsigned long end) |
64 | { | 71 | { |
@@ -85,9 +92,7 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, | |||
85 | 92 | ||
86 | return (nr_pages + 1) * sizeof(struct page); | 93 | return (nr_pages + 1) * sizeof(struct page); |
87 | } | 94 | } |
88 | 95 | #endif | |
89 | unsigned long node_start_pfn[MAX_NUMNODES]; | ||
90 | unsigned long node_end_pfn[MAX_NUMNODES]; | ||
91 | 96 | ||
92 | extern unsigned long find_max_low_pfn(void); | 97 | extern unsigned long find_max_low_pfn(void); |
93 | extern void find_max_pfn(void); | 98 | extern void find_max_pfn(void); |
@@ -108,6 +113,9 @@ unsigned long node_remap_offset[MAX_NUMNODES]; | |||
108 | void *node_remap_start_vaddr[MAX_NUMNODES]; | 113 | void *node_remap_start_vaddr[MAX_NUMNODES]; |
109 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 114 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
110 | 115 | ||
116 | void *node_remap_end_vaddr[MAX_NUMNODES]; | ||
117 | void *node_remap_alloc_vaddr[MAX_NUMNODES]; | ||
118 | |||
111 | /* | 119 | /* |
112 | * FLAT - support for basic PC memory model with discontig enabled, essentially | 120 | * FLAT - support for basic PC memory model with discontig enabled, essentially |
113 | * a single node with all available processors in it with a flat | 121 | * a single node with all available processors in it with a flat |
@@ -146,6 +154,21 @@ static void __init find_max_pfn_node(int nid) | |||
146 | BUG(); | 154 | BUG(); |
147 | } | 155 | } |
148 | 156 | ||
157 | /* Find the owning node for a pfn. */ | ||
158 | int early_pfn_to_nid(unsigned long pfn) | ||
159 | { | ||
160 | int nid; | ||
161 | |||
162 | for_each_node(nid) { | ||
163 | if (node_end_pfn[nid] == 0) | ||
164 | break; | ||
165 | if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn) | ||
166 | return nid; | ||
167 | } | ||
168 | |||
169 | return 0; | ||
170 | } | ||
171 | |||
149 | /* | 172 | /* |
150 | * Allocate memory for the pg_data_t for this node via a crude pre-bootmem | 173 | * Allocate memory for the pg_data_t for this node via a crude pre-bootmem |
151 | * method. For node zero take this from the bottom of memory, for | 174 | * method. For node zero take this from the bottom of memory, for |
@@ -163,6 +186,21 @@ static void __init allocate_pgdat(int nid) | |||
163 | } | 186 | } |
164 | } | 187 | } |
165 | 188 | ||
189 | void *alloc_remap(int nid, unsigned long size) | ||
190 | { | ||
191 | void *allocation = node_remap_alloc_vaddr[nid]; | ||
192 | |||
193 | size = ALIGN(size, L1_CACHE_BYTES); | ||
194 | |||
195 | if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) | ||
196 | return 0; | ||
197 | |||
198 | node_remap_alloc_vaddr[nid] += size; | ||
199 | memset(allocation, 0, size); | ||
200 | |||
201 | return allocation; | ||
202 | } | ||
203 | |||
166 | void __init remap_numa_kva(void) | 204 | void __init remap_numa_kva(void) |
167 | { | 205 | { |
168 | void *vaddr; | 206 | void *vaddr; |
@@ -170,8 +208,6 @@ void __init remap_numa_kva(void) | |||
170 | int node; | 208 | int node; |
171 | 209 | ||
172 | for_each_online_node(node) { | 210 | for_each_online_node(node) { |
173 | if (node == 0) | ||
174 | continue; | ||
175 | for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { | 211 | for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { |
176 | vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); | 212 | vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); |
177 | set_pmd_pfn((ulong) vaddr, | 213 | set_pmd_pfn((ulong) vaddr, |
@@ -185,13 +221,9 @@ static unsigned long calculate_numa_remap_pages(void) | |||
185 | { | 221 | { |
186 | int nid; | 222 | int nid; |
187 | unsigned long size, reserve_pages = 0; | 223 | unsigned long size, reserve_pages = 0; |
224 | unsigned long pfn; | ||
188 | 225 | ||
189 | for_each_online_node(nid) { | 226 | for_each_online_node(nid) { |
190 | if (nid == 0) | ||
191 | continue; | ||
192 | if (!node_remap_size[nid]) | ||
193 | continue; | ||
194 | |||
195 | /* | 227 | /* |
196 | * The acpi/srat node info can show hot-add memroy zones | 228 | * The acpi/srat node info can show hot-add memroy zones |
197 | * where memory could be added but not currently present. | 229 | * where memory could be added but not currently present. |
@@ -208,11 +240,24 @@ static unsigned long calculate_numa_remap_pages(void) | |||
208 | size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; | 240 | size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; |
209 | /* now the roundup is correct, convert to PAGE_SIZE pages */ | 241 | /* now the roundup is correct, convert to PAGE_SIZE pages */ |
210 | size = size * PTRS_PER_PTE; | 242 | size = size * PTRS_PER_PTE; |
243 | |||
244 | /* | ||
245 | * Validate the region we are allocating only contains valid | ||
246 | * pages. | ||
247 | */ | ||
248 | for (pfn = node_end_pfn[nid] - size; | ||
249 | pfn < node_end_pfn[nid]; pfn++) | ||
250 | if (!page_is_ram(pfn)) | ||
251 | break; | ||
252 | |||
253 | if (pfn != node_end_pfn[nid]) | ||
254 | size = 0; | ||
255 | |||
211 | printk("Reserving %ld pages of KVA for lmem_map of node %d\n", | 256 | printk("Reserving %ld pages of KVA for lmem_map of node %d\n", |
212 | size, nid); | 257 | size, nid); |
213 | node_remap_size[nid] = size; | 258 | node_remap_size[nid] = size; |
214 | reserve_pages += size; | ||
215 | node_remap_offset[nid] = reserve_pages; | 259 | node_remap_offset[nid] = reserve_pages; |
260 | reserve_pages += size; | ||
216 | printk("Shrinking node %d from %ld pages to %ld pages\n", | 261 | printk("Shrinking node %d from %ld pages to %ld pages\n", |
217 | nid, node_end_pfn[nid], node_end_pfn[nid] - size); | 262 | nid, node_end_pfn[nid], node_end_pfn[nid] - size); |
218 | node_end_pfn[nid] -= size; | 263 | node_end_pfn[nid] -= size; |
@@ -265,12 +310,18 @@ unsigned long __init setup_memory(void) | |||
265 | (ulong) pfn_to_kaddr(max_low_pfn)); | 310 | (ulong) pfn_to_kaddr(max_low_pfn)); |
266 | for_each_online_node(nid) { | 311 | for_each_online_node(nid) { |
267 | node_remap_start_vaddr[nid] = pfn_to_kaddr( | 312 | node_remap_start_vaddr[nid] = pfn_to_kaddr( |
268 | (highstart_pfn + reserve_pages) - node_remap_offset[nid]); | 313 | highstart_pfn + node_remap_offset[nid]); |
314 | /* Init the node remap allocator */ | ||
315 | node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + | ||
316 | (node_remap_size[nid] * PAGE_SIZE); | ||
317 | node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + | ||
318 | ALIGN(sizeof(pg_data_t), PAGE_SIZE); | ||
319 | |||
269 | allocate_pgdat(nid); | 320 | allocate_pgdat(nid); |
270 | printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, | 321 | printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, |
271 | (ulong) node_remap_start_vaddr[nid], | 322 | (ulong) node_remap_start_vaddr[nid], |
272 | (ulong) pfn_to_kaddr(highstart_pfn + reserve_pages | 323 | (ulong) pfn_to_kaddr(highstart_pfn |
273 | - node_remap_offset[nid] + node_remap_size[nid])); | 324 | + node_remap_offset[nid] + node_remap_size[nid])); |
274 | } | 325 | } |
275 | printk("High memory starts at vaddr %08lx\n", | 326 | printk("High memory starts at vaddr %08lx\n", |
276 | (ulong) pfn_to_kaddr(highstart_pfn)); | 327 | (ulong) pfn_to_kaddr(highstart_pfn)); |
@@ -333,23 +384,9 @@ void __init zone_sizes_init(void) | |||
333 | } | 384 | } |
334 | 385 | ||
335 | zholes_size = get_zholes_size(nid); | 386 | zholes_size = get_zholes_size(nid); |
336 | /* | 387 | |
337 | * We let the lmem_map for node 0 be allocated from the | 388 | free_area_init_node(nid, NODE_DATA(nid), zones_size, start, |
338 | * normal bootmem allocator, but other nodes come from the | 389 | zholes_size); |
339 | * remapped KVA area - mbligh | ||
340 | */ | ||
341 | if (!nid) | ||
342 | free_area_init_node(nid, NODE_DATA(nid), | ||
343 | zones_size, start, zholes_size); | ||
344 | else { | ||
345 | unsigned long lmem_map; | ||
346 | lmem_map = (unsigned long)node_remap_start_vaddr[nid]; | ||
347 | lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1; | ||
348 | lmem_map &= PAGE_MASK; | ||
349 | NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map; | ||
350 | free_area_init_node(nid, NODE_DATA(nid), zones_size, | ||
351 | start, zholes_size); | ||
352 | } | ||
353 | } | 390 | } |
354 | return; | 391 | return; |
355 | } | 392 | } |
@@ -358,24 +395,26 @@ void __init set_highmem_pages_init(int bad_ppro) | |||
358 | { | 395 | { |
359 | #ifdef CONFIG_HIGHMEM | 396 | #ifdef CONFIG_HIGHMEM |
360 | struct zone *zone; | 397 | struct zone *zone; |
398 | struct page *page; | ||
361 | 399 | ||
362 | for_each_zone(zone) { | 400 | for_each_zone(zone) { |
363 | unsigned long node_pfn, node_high_size, zone_start_pfn; | 401 | unsigned long node_pfn, zone_start_pfn, zone_end_pfn; |
364 | struct page * zone_mem_map; | 402 | |
365 | |||
366 | if (!is_highmem(zone)) | 403 | if (!is_highmem(zone)) |
367 | continue; | 404 | continue; |
368 | 405 | ||
369 | printk("Initializing %s for node %d\n", zone->name, | ||
370 | zone->zone_pgdat->node_id); | ||
371 | |||
372 | node_high_size = zone->spanned_pages; | ||
373 | zone_mem_map = zone->zone_mem_map; | ||
374 | zone_start_pfn = zone->zone_start_pfn; | 406 | zone_start_pfn = zone->zone_start_pfn; |
407 | zone_end_pfn = zone_start_pfn + zone->spanned_pages; | ||
408 | |||
409 | printk("Initializing %s for node %d (%08lx:%08lx)\n", | ||
410 | zone->name, zone->zone_pgdat->node_id, | ||
411 | zone_start_pfn, zone_end_pfn); | ||
375 | 412 | ||
376 | for (node_pfn = 0; node_pfn < node_high_size; node_pfn++) { | 413 | for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { |
377 | one_highpage_init((struct page *)(zone_mem_map + node_pfn), | 414 | if (!pfn_valid(node_pfn)) |
378 | zone_start_pfn + node_pfn, bad_ppro); | 415 | continue; |
416 | page = pfn_to_page(node_pfn); | ||
417 | one_highpage_init(page, node_pfn, bad_ppro); | ||
379 | } | 418 | } |
380 | } | 419 | } |
381 | totalram_pages += totalhigh_pages; | 420 | totalram_pages += totalhigh_pages; |