aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/mm/discontig.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/mm/discontig.c')
-rw-r--r--arch/i386/mm/discontig.c127
1 files changed, 83 insertions, 44 deletions
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index 1726b4096b10..f429c871e845 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -29,12 +29,14 @@
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/initrd.h> 30#include <linux/initrd.h>
31#include <linux/nodemask.h> 31#include <linux/nodemask.h>
32#include <linux/module.h>
32#include <asm/e820.h> 33#include <asm/e820.h>
33#include <asm/setup.h> 34#include <asm/setup.h>
34#include <asm/mmzone.h> 35#include <asm/mmzone.h>
35#include <bios_ebda.h> 36#include <bios_ebda.h>
36 37
37struct pglist_data *node_data[MAX_NUMNODES]; 38struct pglist_data *node_data[MAX_NUMNODES];
39EXPORT_SYMBOL(node_data);
38bootmem_data_t node0_bdata; 40bootmem_data_t node0_bdata;
39 41
40/* 42/*
@@ -42,12 +44,16 @@ bootmem_data_t node0_bdata;
42 * populated the following initialisation. 44 * populated the following initialisation.
43 * 45 *
44 * 1) node_online_map - the map of all nodes configured (online) in the system 46 * 1) node_online_map - the map of all nodes configured (online) in the system
45 * 2) physnode_map - the mapping between a pfn and owning node 47 * 2) node_start_pfn - the starting page frame number for a node
46 * 3) node_start_pfn - the starting page frame number for a node
47 * 3) node_end_pfn - the ending page fram number for a node 48 * 3) node_end_pfn - the ending page fram number for a node
48 */ 49 */
50unsigned long node_start_pfn[MAX_NUMNODES];
51unsigned long node_end_pfn[MAX_NUMNODES];
52
49 53
54#ifdef CONFIG_DISCONTIGMEM
50/* 55/*
56 * 4) physnode_map - the mapping between a pfn and owning node
51 * physnode_map keeps track of the physical memory layout of a generic 57 * physnode_map keeps track of the physical memory layout of a generic
52 * numa node on a 256Mb break (each element of the array will 58 * numa node on a 256Mb break (each element of the array will
53 * represent 256Mb of memory and will be marked by the node id. so, 59 * represent 256Mb of memory and will be marked by the node id. so,
@@ -59,6 +65,7 @@ bootmem_data_t node0_bdata;
59 * physnode_map[8- ] = -1; 65 * physnode_map[8- ] = -1;
60 */ 66 */
61s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1}; 67s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1};
68EXPORT_SYMBOL(physnode_map);
62 69
63void memory_present(int nid, unsigned long start, unsigned long end) 70void memory_present(int nid, unsigned long start, unsigned long end)
64{ 71{
@@ -85,9 +92,7 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
85 92
86 return (nr_pages + 1) * sizeof(struct page); 93 return (nr_pages + 1) * sizeof(struct page);
87} 94}
88 95#endif
89unsigned long node_start_pfn[MAX_NUMNODES];
90unsigned long node_end_pfn[MAX_NUMNODES];
91 96
92extern unsigned long find_max_low_pfn(void); 97extern unsigned long find_max_low_pfn(void);
93extern void find_max_pfn(void); 98extern void find_max_pfn(void);
@@ -108,6 +113,9 @@ unsigned long node_remap_offset[MAX_NUMNODES];
108void *node_remap_start_vaddr[MAX_NUMNODES]; 113void *node_remap_start_vaddr[MAX_NUMNODES];
109void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); 114void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
110 115
116void *node_remap_end_vaddr[MAX_NUMNODES];
117void *node_remap_alloc_vaddr[MAX_NUMNODES];
118
111/* 119/*
112 * FLAT - support for basic PC memory model with discontig enabled, essentially 120 * FLAT - support for basic PC memory model with discontig enabled, essentially
113 * a single node with all available processors in it with a flat 121 * a single node with all available processors in it with a flat
@@ -146,6 +154,21 @@ static void __init find_max_pfn_node(int nid)
146 BUG(); 154 BUG();
147} 155}
148 156
157/* Find the owning node for a pfn. */
158int early_pfn_to_nid(unsigned long pfn)
159{
160 int nid;
161
162 for_each_node(nid) {
163 if (node_end_pfn[nid] == 0)
164 break;
165 if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn)
166 return nid;
167 }
168
169 return 0;
170}
171
149/* 172/*
150 * Allocate memory for the pg_data_t for this node via a crude pre-bootmem 173 * Allocate memory for the pg_data_t for this node via a crude pre-bootmem
151 * method. For node zero take this from the bottom of memory, for 174 * method. For node zero take this from the bottom of memory, for
@@ -163,6 +186,21 @@ static void __init allocate_pgdat(int nid)
163 } 186 }
164} 187}
165 188
189void *alloc_remap(int nid, unsigned long size)
190{
191 void *allocation = node_remap_alloc_vaddr[nid];
192
193 size = ALIGN(size, L1_CACHE_BYTES);
194
195 if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid])
196 return 0;
197
198 node_remap_alloc_vaddr[nid] += size;
199 memset(allocation, 0, size);
200
201 return allocation;
202}
203
166void __init remap_numa_kva(void) 204void __init remap_numa_kva(void)
167{ 205{
168 void *vaddr; 206 void *vaddr;
@@ -170,8 +208,6 @@ void __init remap_numa_kva(void)
170 int node; 208 int node;
171 209
172 for_each_online_node(node) { 210 for_each_online_node(node) {
173 if (node == 0)
174 continue;
175 for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { 211 for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
176 vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); 212 vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
177 set_pmd_pfn((ulong) vaddr, 213 set_pmd_pfn((ulong) vaddr,
@@ -185,13 +221,9 @@ static unsigned long calculate_numa_remap_pages(void)
185{ 221{
186 int nid; 222 int nid;
187 unsigned long size, reserve_pages = 0; 223 unsigned long size, reserve_pages = 0;
224 unsigned long pfn;
188 225
189 for_each_online_node(nid) { 226 for_each_online_node(nid) {
190 if (nid == 0)
191 continue;
192 if (!node_remap_size[nid])
193 continue;
194
195 /* 227 /*
196 * The acpi/srat node info can show hot-add memroy zones 228 * The acpi/srat node info can show hot-add memroy zones
197 * where memory could be added but not currently present. 229 * where memory could be added but not currently present.
@@ -208,11 +240,24 @@ static unsigned long calculate_numa_remap_pages(void)
208 size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; 240 size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
209 /* now the roundup is correct, convert to PAGE_SIZE pages */ 241 /* now the roundup is correct, convert to PAGE_SIZE pages */
210 size = size * PTRS_PER_PTE; 242 size = size * PTRS_PER_PTE;
243
244 /*
245 * Validate the region we are allocating only contains valid
246 * pages.
247 */
248 for (pfn = node_end_pfn[nid] - size;
249 pfn < node_end_pfn[nid]; pfn++)
250 if (!page_is_ram(pfn))
251 break;
252
253 if (pfn != node_end_pfn[nid])
254 size = 0;
255
211 printk("Reserving %ld pages of KVA for lmem_map of node %d\n", 256 printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
212 size, nid); 257 size, nid);
213 node_remap_size[nid] = size; 258 node_remap_size[nid] = size;
214 reserve_pages += size;
215 node_remap_offset[nid] = reserve_pages; 259 node_remap_offset[nid] = reserve_pages;
260 reserve_pages += size;
216 printk("Shrinking node %d from %ld pages to %ld pages\n", 261 printk("Shrinking node %d from %ld pages to %ld pages\n",
217 nid, node_end_pfn[nid], node_end_pfn[nid] - size); 262 nid, node_end_pfn[nid], node_end_pfn[nid] - size);
218 node_end_pfn[nid] -= size; 263 node_end_pfn[nid] -= size;
@@ -265,12 +310,18 @@ unsigned long __init setup_memory(void)
265 (ulong) pfn_to_kaddr(max_low_pfn)); 310 (ulong) pfn_to_kaddr(max_low_pfn));
266 for_each_online_node(nid) { 311 for_each_online_node(nid) {
267 node_remap_start_vaddr[nid] = pfn_to_kaddr( 312 node_remap_start_vaddr[nid] = pfn_to_kaddr(
268 (highstart_pfn + reserve_pages) - node_remap_offset[nid]); 313 highstart_pfn + node_remap_offset[nid]);
314 /* Init the node remap allocator */
315 node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
316 (node_remap_size[nid] * PAGE_SIZE);
317 node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
318 ALIGN(sizeof(pg_data_t), PAGE_SIZE);
319
269 allocate_pgdat(nid); 320 allocate_pgdat(nid);
270 printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, 321 printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
271 (ulong) node_remap_start_vaddr[nid], 322 (ulong) node_remap_start_vaddr[nid],
272 (ulong) pfn_to_kaddr(highstart_pfn + reserve_pages 323 (ulong) pfn_to_kaddr(highstart_pfn
273 - node_remap_offset[nid] + node_remap_size[nid])); 324 + node_remap_offset[nid] + node_remap_size[nid]));
274 } 325 }
275 printk("High memory starts at vaddr %08lx\n", 326 printk("High memory starts at vaddr %08lx\n",
276 (ulong) pfn_to_kaddr(highstart_pfn)); 327 (ulong) pfn_to_kaddr(highstart_pfn));
@@ -333,23 +384,9 @@ void __init zone_sizes_init(void)
333 } 384 }
334 385
335 zholes_size = get_zholes_size(nid); 386 zholes_size = get_zholes_size(nid);
336 /* 387
337 * We let the lmem_map for node 0 be allocated from the 388 free_area_init_node(nid, NODE_DATA(nid), zones_size, start,
338 * normal bootmem allocator, but other nodes come from the 389 zholes_size);
339 * remapped KVA area - mbligh
340 */
341 if (!nid)
342 free_area_init_node(nid, NODE_DATA(nid),
343 zones_size, start, zholes_size);
344 else {
345 unsigned long lmem_map;
346 lmem_map = (unsigned long)node_remap_start_vaddr[nid];
347 lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1;
348 lmem_map &= PAGE_MASK;
349 NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map;
350 free_area_init_node(nid, NODE_DATA(nid), zones_size,
351 start, zholes_size);
352 }
353 } 390 }
354 return; 391 return;
355} 392}
@@ -358,24 +395,26 @@ void __init set_highmem_pages_init(int bad_ppro)
358{ 395{
359#ifdef CONFIG_HIGHMEM 396#ifdef CONFIG_HIGHMEM
360 struct zone *zone; 397 struct zone *zone;
398 struct page *page;
361 399
362 for_each_zone(zone) { 400 for_each_zone(zone) {
363 unsigned long node_pfn, node_high_size, zone_start_pfn; 401 unsigned long node_pfn, zone_start_pfn, zone_end_pfn;
364 struct page * zone_mem_map; 402
365
366 if (!is_highmem(zone)) 403 if (!is_highmem(zone))
367 continue; 404 continue;
368 405
369 printk("Initializing %s for node %d\n", zone->name,
370 zone->zone_pgdat->node_id);
371
372 node_high_size = zone->spanned_pages;
373 zone_mem_map = zone->zone_mem_map;
374 zone_start_pfn = zone->zone_start_pfn; 406 zone_start_pfn = zone->zone_start_pfn;
407 zone_end_pfn = zone_start_pfn + zone->spanned_pages;
408
409 printk("Initializing %s for node %d (%08lx:%08lx)\n",
410 zone->name, zone->zone_pgdat->node_id,
411 zone_start_pfn, zone_end_pfn);
375 412
376 for (node_pfn = 0; node_pfn < node_high_size; node_pfn++) { 413 for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) {
377 one_highpage_init((struct page *)(zone_mem_map + node_pfn), 414 if (!pfn_valid(node_pfn))
378 zone_start_pfn + node_pfn, bad_ppro); 415 continue;
416 page = pfn_to_page(node_pfn);
417 one_highpage_init(page, node_pfn, bad_ppro);
379 } 418 }
380 } 419 }
381 totalram_pages += totalhigh_pages; 420 totalram_pages += totalhigh_pages;