aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Hansen <haveblue@us.ibm.com>2005-06-23 03:07:39 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-23 12:45:01 -0400
commit6f167ec721108c9282d54424516a12c805e3c306 (patch)
treef7094a2524611ede76b32e4cc3c07987b7b0e275
parentc2ebaa425e6630adcbf757b004d257dd4204925b (diff)
[PATCH] sparsemem base: simple NUMA remap space allocator
Introduce a simple allocator for the NUMA remap space. This space is very scarce, used for structures which are best allocated node local. This mechanism is also used on non-NUMA ia64 systems with a vmem_map to keep the pgdat->node_mem_map initialized in a consistent place for all architectures. Issues: o alloc_remap takes a node_id where we might expect a pgdat which was intended to allow us to allocate the pgdat's using this mechanism; which we do not yet do. Could have alloc_remap_node() and alloc_remap_nid() for this purpose. Signed-off-by: Andy Whitcroft <apw@shadowen.org> Signed-off-by: Dave Hansen <haveblue@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/i386/Kconfig5
-rw-r--r--arch/i386/mm/discontig.c59
-rw-r--r--include/linux/bootmem.h9
-rw-r--r--mm/page_alloc.c6
4 files changed, 50 insertions, 29 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index dfd904f6883..35ca3a17ed2 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -803,6 +803,11 @@ config NEED_NODE_MEMMAP_SIZE
803 depends on DISCONTIGMEM 803 depends on DISCONTIGMEM
804 default y 804 default y
805 805
806config HAVE_ARCH_ALLOC_REMAP
807 bool
808 depends on NUMA
809 default y
810
806config HIGHPTE 811config HIGHPTE
807 bool "Allocate 3rd-level pagetables from highmem" 812 bool "Allocate 3rd-level pagetables from highmem"
808 depends on HIGHMEM4G || HIGHMEM64G 813 depends on HIGHMEM4G || HIGHMEM64G
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index 85d2fcbe107..dcc71f969b0 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -108,6 +108,9 @@ unsigned long node_remap_offset[MAX_NUMNODES];
108void *node_remap_start_vaddr[MAX_NUMNODES]; 108void *node_remap_start_vaddr[MAX_NUMNODES];
109void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); 109void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
110 110
111void *node_remap_end_vaddr[MAX_NUMNODES];
112void *node_remap_alloc_vaddr[MAX_NUMNODES];
113
111/* 114/*
112 * FLAT - support for basic PC memory model with discontig enabled, essentially 115 * FLAT - support for basic PC memory model with discontig enabled, essentially
113 * a single node with all available processors in it with a flat 116 * a single node with all available processors in it with a flat
@@ -178,6 +181,21 @@ static void __init allocate_pgdat(int nid)
178 } 181 }
179} 182}
180 183
184void *alloc_remap(int nid, unsigned long size)
185{
186 void *allocation = node_remap_alloc_vaddr[nid];
187
188 size = ALIGN(size, L1_CACHE_BYTES);
189
190 if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid])
191 return 0;
192
193 node_remap_alloc_vaddr[nid] += size;
194 memset(allocation, 0, size);
195
196 return allocation;
197}
198
181void __init remap_numa_kva(void) 199void __init remap_numa_kva(void)
182{ 200{
183 void *vaddr; 201 void *vaddr;
@@ -185,8 +203,6 @@ void __init remap_numa_kva(void)
185 int node; 203 int node;
186 204
187 for_each_online_node(node) { 205 for_each_online_node(node) {
188 if (node == 0)
189 continue;
190 for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { 206 for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
191 vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); 207 vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
192 set_pmd_pfn((ulong) vaddr, 208 set_pmd_pfn((ulong) vaddr,
@@ -202,11 +218,6 @@ static unsigned long calculate_numa_remap_pages(void)
202 unsigned long size, reserve_pages = 0; 218 unsigned long size, reserve_pages = 0;
203 219
204 for_each_online_node(nid) { 220 for_each_online_node(nid) {
205 if (nid == 0)
206 continue;
207 if (!node_remap_size[nid])
208 continue;
209
210 /* 221 /*
211 * The acpi/srat node info can show hot-add memroy zones 222 * The acpi/srat node info can show hot-add memroy zones
212 * where memory could be added but not currently present. 223 * where memory could be added but not currently present.
@@ -226,8 +237,8 @@ static unsigned long calculate_numa_remap_pages(void)
226 printk("Reserving %ld pages of KVA for lmem_map of node %d\n", 237 printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
227 size, nid); 238 size, nid);
228 node_remap_size[nid] = size; 239 node_remap_size[nid] = size;
229 reserve_pages += size;
230 node_remap_offset[nid] = reserve_pages; 240 node_remap_offset[nid] = reserve_pages;
241 reserve_pages += size;
231 printk("Shrinking node %d from %ld pages to %ld pages\n", 242 printk("Shrinking node %d from %ld pages to %ld pages\n",
232 nid, node_end_pfn[nid], node_end_pfn[nid] - size); 243 nid, node_end_pfn[nid], node_end_pfn[nid] - size);
233 node_end_pfn[nid] -= size; 244 node_end_pfn[nid] -= size;
@@ -280,12 +291,18 @@ unsigned long __init setup_memory(void)
280 (ulong) pfn_to_kaddr(max_low_pfn)); 291 (ulong) pfn_to_kaddr(max_low_pfn));
281 for_each_online_node(nid) { 292 for_each_online_node(nid) {
282 node_remap_start_vaddr[nid] = pfn_to_kaddr( 293 node_remap_start_vaddr[nid] = pfn_to_kaddr(
283 (highstart_pfn + reserve_pages) - node_remap_offset[nid]); 294 highstart_pfn + node_remap_offset[nid]);
295 /* Init the node remap allocator */
296 node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
297 (node_remap_size[nid] * PAGE_SIZE);
298 node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
299 ALIGN(sizeof(pg_data_t), PAGE_SIZE);
300
284 allocate_pgdat(nid); 301 allocate_pgdat(nid);
285 printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, 302 printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
286 (ulong) node_remap_start_vaddr[nid], 303 (ulong) node_remap_start_vaddr[nid],
287 (ulong) pfn_to_kaddr(highstart_pfn + reserve_pages 304 (ulong) pfn_to_kaddr(highstart_pfn
288 - node_remap_offset[nid] + node_remap_size[nid])); 305 + node_remap_offset[nid] + node_remap_size[nid]));
289 } 306 }
290 printk("High memory starts at vaddr %08lx\n", 307 printk("High memory starts at vaddr %08lx\n",
291 (ulong) pfn_to_kaddr(highstart_pfn)); 308 (ulong) pfn_to_kaddr(highstart_pfn));
@@ -348,23 +365,9 @@ void __init zone_sizes_init(void)
348 } 365 }
349 366
350 zholes_size = get_zholes_size(nid); 367 zholes_size = get_zholes_size(nid);
351 /* 368
352 * We let the lmem_map for node 0 be allocated from the 369 free_area_init_node(nid, NODE_DATA(nid), zones_size, start,
353 * normal bootmem allocator, but other nodes come from the 370 zholes_size);
354 * remapped KVA area - mbligh
355 */
356 if (!nid)
357 free_area_init_node(nid, NODE_DATA(nid),
358 zones_size, start, zholes_size);
359 else {
360 unsigned long lmem_map;
361 lmem_map = (unsigned long)node_remap_start_vaddr[nid];
362 lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1;
363 lmem_map &= PAGE_MASK;
364 NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map;
365 free_area_init_node(nid, NODE_DATA(nid), zones_size,
366 start, zholes_size);
367 }
368 } 371 }
369 return; 372 return;
370} 373}
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 0dd8ca1a3d5..500f451ce0c 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -67,6 +67,15 @@ extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size,
67 __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0) 67 __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0)
68#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ 68#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
69 69
70#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
71extern void *alloc_remap(int nid, unsigned long size);
72#else
73static inline void *alloc_remap(int nid, unsigned long size)
74{
75 return NULL;
76}
77#endif
78
70extern unsigned long __initdata nr_kernel_pages; 79extern unsigned long __initdata nr_kernel_pages;
71extern unsigned long __initdata nr_all_pages; 80extern unsigned long __initdata nr_all_pages;
72 81
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 559336de968..bf1dd881909 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1936,6 +1936,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
1936static void __init alloc_node_mem_map(struct pglist_data *pgdat) 1936static void __init alloc_node_mem_map(struct pglist_data *pgdat)
1937{ 1937{
1938 unsigned long size; 1938 unsigned long size;
1939 struct page *map;
1939 1940
1940 /* Skip empty nodes */ 1941 /* Skip empty nodes */
1941 if (!pgdat->node_spanned_pages) 1942 if (!pgdat->node_spanned_pages)
@@ -1944,7 +1945,10 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat)
1944 /* ia64 gets its own node_mem_map, before this, without bootmem */ 1945 /* ia64 gets its own node_mem_map, before this, without bootmem */
1945 if (!pgdat->node_mem_map) { 1946 if (!pgdat->node_mem_map) {
1946 size = (pgdat->node_spanned_pages + 1) * sizeof(struct page); 1947 size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
1947 pgdat->node_mem_map = alloc_bootmem_node(pgdat, size); 1948 map = alloc_remap(pgdat->node_id, size);
1949 if (!map)
1950 map = alloc_bootmem_node(pgdat, size);
1951 pgdat->node_mem_map = map;
1948 } 1952 }
1949#ifndef CONFIG_DISCONTIGMEM 1953#ifndef CONFIG_DISCONTIGMEM
1950 /* 1954 /*