diff options
author | Dave Hansen <haveblue@us.ibm.com> | 2005-06-23 03:07:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-23 12:45:01 -0400 |
commit | 6f167ec721108c9282d54424516a12c805e3c306 (patch) | |
tree | f7094a2524611ede76b32e4cc3c07987b7b0e275 | |
parent | c2ebaa425e6630adcbf757b004d257dd4204925b (diff) |
[PATCH] sparsemem base: simple NUMA remap space allocator
Introduce a simple allocator for the NUMA remap space. This space is very
scarce, used for structures which are best allocated node local.
This mechanism is also used on non-NUMA ia64 systems with a vmem_map to keep
the pgdat->node_mem_map initialized in a consistent place for all
architectures.
Issues:
o alloc_remap takes a node_id where we might expect a pgdat which was intended
to allow us to allocate the pgdat's using this mechanism; which we do not yet
do. Could have alloc_remap_node() and alloc_remap_nid() for this purpose.
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/i386/Kconfig | 5 | ||||
-rw-r--r-- | arch/i386/mm/discontig.c | 59 | ||||
-rw-r--r-- | include/linux/bootmem.h | 9 | ||||
-rw-r--r-- | mm/page_alloc.c | 6 |
4 files changed, 50 insertions, 29 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index dfd904f6883b..35ca3a17ed20 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig | |||
@@ -803,6 +803,11 @@ config NEED_NODE_MEMMAP_SIZE | |||
803 | depends on DISCONTIGMEM | 803 | depends on DISCONTIGMEM |
804 | default y | 804 | default y |
805 | 805 | ||
806 | config HAVE_ARCH_ALLOC_REMAP | ||
807 | bool | ||
808 | depends on NUMA | ||
809 | default y | ||
810 | |||
806 | config HIGHPTE | 811 | config HIGHPTE |
807 | bool "Allocate 3rd-level pagetables from highmem" | 812 | bool "Allocate 3rd-level pagetables from highmem" |
808 | depends on HIGHMEM4G || HIGHMEM64G | 813 | depends on HIGHMEM4G || HIGHMEM64G |
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 85d2fcbe1079..dcc71f969b01 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c | |||
@@ -108,6 +108,9 @@ unsigned long node_remap_offset[MAX_NUMNODES]; | |||
108 | void *node_remap_start_vaddr[MAX_NUMNODES]; | 108 | void *node_remap_start_vaddr[MAX_NUMNODES]; |
109 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 109 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
110 | 110 | ||
111 | void *node_remap_end_vaddr[MAX_NUMNODES]; | ||
112 | void *node_remap_alloc_vaddr[MAX_NUMNODES]; | ||
113 | |||
111 | /* | 114 | /* |
112 | * FLAT - support for basic PC memory model with discontig enabled, essentially | 115 | * FLAT - support for basic PC memory model with discontig enabled, essentially |
113 | * a single node with all available processors in it with a flat | 116 | * a single node with all available processors in it with a flat |
@@ -178,6 +181,21 @@ static void __init allocate_pgdat(int nid) | |||
178 | } | 181 | } |
179 | } | 182 | } |
180 | 183 | ||
184 | void *alloc_remap(int nid, unsigned long size) | ||
185 | { | ||
186 | void *allocation = node_remap_alloc_vaddr[nid]; | ||
187 | |||
188 | size = ALIGN(size, L1_CACHE_BYTES); | ||
189 | |||
190 | if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) | ||
191 | return 0; | ||
192 | |||
193 | node_remap_alloc_vaddr[nid] += size; | ||
194 | memset(allocation, 0, size); | ||
195 | |||
196 | return allocation; | ||
197 | } | ||
198 | |||
181 | void __init remap_numa_kva(void) | 199 | void __init remap_numa_kva(void) |
182 | { | 200 | { |
183 | void *vaddr; | 201 | void *vaddr; |
@@ -185,8 +203,6 @@ void __init remap_numa_kva(void) | |||
185 | int node; | 203 | int node; |
186 | 204 | ||
187 | for_each_online_node(node) { | 205 | for_each_online_node(node) { |
188 | if (node == 0) | ||
189 | continue; | ||
190 | for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { | 206 | for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { |
191 | vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); | 207 | vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); |
192 | set_pmd_pfn((ulong) vaddr, | 208 | set_pmd_pfn((ulong) vaddr, |
@@ -202,11 +218,6 @@ static unsigned long calculate_numa_remap_pages(void) | |||
202 | unsigned long size, reserve_pages = 0; | 218 | unsigned long size, reserve_pages = 0; |
203 | 219 | ||
204 | for_each_online_node(nid) { | 220 | for_each_online_node(nid) { |
205 | if (nid == 0) | ||
206 | continue; | ||
207 | if (!node_remap_size[nid]) | ||
208 | continue; | ||
209 | |||
210 | /* | 221 | /* |
211 | * The acpi/srat node info can show hot-add memroy zones | 222 | * The acpi/srat node info can show hot-add memroy zones |
212 | * where memory could be added but not currently present. | 223 | * where memory could be added but not currently present. |
@@ -226,8 +237,8 @@ static unsigned long calculate_numa_remap_pages(void) | |||
226 | printk("Reserving %ld pages of KVA for lmem_map of node %d\n", | 237 | printk("Reserving %ld pages of KVA for lmem_map of node %d\n", |
227 | size, nid); | 238 | size, nid); |
228 | node_remap_size[nid] = size; | 239 | node_remap_size[nid] = size; |
229 | reserve_pages += size; | ||
230 | node_remap_offset[nid] = reserve_pages; | 240 | node_remap_offset[nid] = reserve_pages; |
241 | reserve_pages += size; | ||
231 | printk("Shrinking node %d from %ld pages to %ld pages\n", | 242 | printk("Shrinking node %d from %ld pages to %ld pages\n", |
232 | nid, node_end_pfn[nid], node_end_pfn[nid] - size); | 243 | nid, node_end_pfn[nid], node_end_pfn[nid] - size); |
233 | node_end_pfn[nid] -= size; | 244 | node_end_pfn[nid] -= size; |
@@ -280,12 +291,18 @@ unsigned long __init setup_memory(void) | |||
280 | (ulong) pfn_to_kaddr(max_low_pfn)); | 291 | (ulong) pfn_to_kaddr(max_low_pfn)); |
281 | for_each_online_node(nid) { | 292 | for_each_online_node(nid) { |
282 | node_remap_start_vaddr[nid] = pfn_to_kaddr( | 293 | node_remap_start_vaddr[nid] = pfn_to_kaddr( |
283 | (highstart_pfn + reserve_pages) - node_remap_offset[nid]); | 294 | highstart_pfn + node_remap_offset[nid]); |
295 | /* Init the node remap allocator */ | ||
296 | node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + | ||
297 | (node_remap_size[nid] * PAGE_SIZE); | ||
298 | node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + | ||
299 | ALIGN(sizeof(pg_data_t), PAGE_SIZE); | ||
300 | |||
284 | allocate_pgdat(nid); | 301 | allocate_pgdat(nid); |
285 | printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, | 302 | printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, |
286 | (ulong) node_remap_start_vaddr[nid], | 303 | (ulong) node_remap_start_vaddr[nid], |
287 | (ulong) pfn_to_kaddr(highstart_pfn + reserve_pages | 304 | (ulong) pfn_to_kaddr(highstart_pfn |
288 | - node_remap_offset[nid] + node_remap_size[nid])); | 305 | + node_remap_offset[nid] + node_remap_size[nid])); |
289 | } | 306 | } |
290 | printk("High memory starts at vaddr %08lx\n", | 307 | printk("High memory starts at vaddr %08lx\n", |
291 | (ulong) pfn_to_kaddr(highstart_pfn)); | 308 | (ulong) pfn_to_kaddr(highstart_pfn)); |
@@ -348,23 +365,9 @@ void __init zone_sizes_init(void) | |||
348 | } | 365 | } |
349 | 366 | ||
350 | zholes_size = get_zholes_size(nid); | 367 | zholes_size = get_zholes_size(nid); |
351 | /* | 368 | |
352 | * We let the lmem_map for node 0 be allocated from the | 369 | free_area_init_node(nid, NODE_DATA(nid), zones_size, start, |
353 | * normal bootmem allocator, but other nodes come from the | 370 | zholes_size); |
354 | * remapped KVA area - mbligh | ||
355 | */ | ||
356 | if (!nid) | ||
357 | free_area_init_node(nid, NODE_DATA(nid), | ||
358 | zones_size, start, zholes_size); | ||
359 | else { | ||
360 | unsigned long lmem_map; | ||
361 | lmem_map = (unsigned long)node_remap_start_vaddr[nid]; | ||
362 | lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1; | ||
363 | lmem_map &= PAGE_MASK; | ||
364 | NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map; | ||
365 | free_area_init_node(nid, NODE_DATA(nid), zones_size, | ||
366 | start, zholes_size); | ||
367 | } | ||
368 | } | 371 | } |
369 | return; | 372 | return; |
370 | } | 373 | } |
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 0dd8ca1a3d5a..500f451ce0c0 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h | |||
@@ -67,6 +67,15 @@ extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, | |||
67 | __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0) | 67 | __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0) |
68 | #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ | 68 | #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ |
69 | 69 | ||
70 | #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP | ||
71 | extern void *alloc_remap(int nid, unsigned long size); | ||
72 | #else | ||
73 | static inline void *alloc_remap(int nid, unsigned long size) | ||
74 | { | ||
75 | return NULL; | ||
76 | } | ||
77 | #endif | ||
78 | |||
70 | extern unsigned long __initdata nr_kernel_pages; | 79 | extern unsigned long __initdata nr_kernel_pages; |
71 | extern unsigned long __initdata nr_all_pages; | 80 | extern unsigned long __initdata nr_all_pages; |
72 | 81 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 559336de9687..bf1dd8819097 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1936,6 +1936,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat, | |||
1936 | static void __init alloc_node_mem_map(struct pglist_data *pgdat) | 1936 | static void __init alloc_node_mem_map(struct pglist_data *pgdat) |
1937 | { | 1937 | { |
1938 | unsigned long size; | 1938 | unsigned long size; |
1939 | struct page *map; | ||
1939 | 1940 | ||
1940 | /* Skip empty nodes */ | 1941 | /* Skip empty nodes */ |
1941 | if (!pgdat->node_spanned_pages) | 1942 | if (!pgdat->node_spanned_pages) |
@@ -1944,7 +1945,10 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) | |||
1944 | /* ia64 gets its own node_mem_map, before this, without bootmem */ | 1945 | /* ia64 gets its own node_mem_map, before this, without bootmem */ |
1945 | if (!pgdat->node_mem_map) { | 1946 | if (!pgdat->node_mem_map) { |
1946 | size = (pgdat->node_spanned_pages + 1) * sizeof(struct page); | 1947 | size = (pgdat->node_spanned_pages + 1) * sizeof(struct page); |
1947 | pgdat->node_mem_map = alloc_bootmem_node(pgdat, size); | 1948 | map = alloc_remap(pgdat->node_id, size); |
1949 | if (!map) | ||
1950 | map = alloc_bootmem_node(pgdat, size); | ||
1951 | pgdat->node_mem_map = map; | ||
1948 | } | 1952 | } |
1949 | #ifndef CONFIG_DISCONTIGMEM | 1953 | #ifndef CONFIG_DISCONTIGMEM |
1950 | /* | 1954 | /* |