aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2006-09-27 04:49:56 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-27 11:26:11 -0400
commit0e0b864e069c52a7b3e4a7da56e29b03a012fd75 (patch)
treee3fcfd997ef912ed3c61cb2b5c2ca57a7f45ec58
parent05e0caad3b7bd0d0fbeff980bca22f186241a501 (diff)
[PATCH] Account for memmap and optionally the kernel image as holes
The x86_64 code accounted for memmap and some portions of the the DMA zone as holes. This was because those areas would never be reclaimed and accounting for them as memory affects min watermarks. This patch will account for the memmap as a memory hole. Architectures may optionally use set_dma_reserve() if they wish to account for a portion of memory in ZONE_DMA as a hole. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Andy Whitcroft <apw@shadowen.org> Cc: Andi Kleen <ak@muc.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: "Keith Mannthey" <kmannth@gmail.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Yasunori Goto <y-goto@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/x86_64/mm/init.c4
-rw-r--r--include/linux/mm.h1
-rw-r--r--mm/page_alloc.c60
3 files changed, 63 insertions, 2 deletions
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 47928399e38a..3e16fe08150e 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -655,8 +655,10 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
655#else 655#else
656 reserve_bootmem(phys, len); 656 reserve_bootmem(phys, len);
657#endif 657#endif
658 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) 658 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
659 dma_reserve += len / PAGE_SIZE; 659 dma_reserve += len / PAGE_SIZE;
660 set_dma_reserve(dma_reserve);
661 }
660} 662}
661 663
662int kern_addr_valid(unsigned long addr) 664int kern_addr_valid(unsigned long addr)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c0402da7cce0..22936e1fcdf2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -984,6 +984,7 @@ extern void sparse_memory_present_with_active_regions(int nid);
984extern int early_pfn_to_nid(unsigned long pfn); 984extern int early_pfn_to_nid(unsigned long pfn);
985#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ 985#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
986#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ 986#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
987extern void set_dma_reserve(unsigned long new_dma_reserve);
987extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long); 988extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
988extern void setup_per_zone_pages_min(void); 989extern void setup_per_zone_pages_min(void);
989extern void mem_init(void); 990extern void mem_init(void);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 26c9939857fa..8d9a1eb9fbba 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -104,6 +104,7 @@ int min_free_kbytes = 1024;
104 104
105unsigned long __meminitdata nr_kernel_pages; 105unsigned long __meminitdata nr_kernel_pages;
106unsigned long __meminitdata nr_all_pages; 106unsigned long __meminitdata nr_all_pages;
107static unsigned long __initdata dma_reserve;
107 108
108#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 109#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
109 /* 110 /*
@@ -2213,6 +2214,20 @@ unsigned long __init zone_absent_pages_in_node(int nid,
2213 arch_zone_lowest_possible_pfn[zone_type], 2214 arch_zone_lowest_possible_pfn[zone_type],
2214 arch_zone_highest_possible_pfn[zone_type]); 2215 arch_zone_highest_possible_pfn[zone_type]);
2215} 2216}
2217
2218/* Return the zone index a PFN is in */
2219int memmap_zone_idx(struct page *lmem_map)
2220{
2221 int i;
2222 unsigned long phys_addr = virt_to_phys(lmem_map);
2223 unsigned long pfn = phys_addr >> PAGE_SHIFT;
2224
2225 for (i = 0; i < MAX_NR_ZONES; i++)
2226 if (pfn < arch_zone_highest_possible_pfn[i])
2227 break;
2228
2229 return i;
2230}
2216#else 2231#else
2217static inline unsigned long zone_spanned_pages_in_node(int nid, 2232static inline unsigned long zone_spanned_pages_in_node(int nid,
2218 unsigned long zone_type, 2233 unsigned long zone_type,
@@ -2230,6 +2245,11 @@ static inline unsigned long zone_absent_pages_in_node(int nid,
2230 2245
2231 return zholes_size[zone_type]; 2246 return zholes_size[zone_type];
2232} 2247}
2248
2249static inline int memmap_zone_idx(struct page *lmem_map)
2250{
2251 return MAX_NR_ZONES;
2252}
2233#endif 2253#endif
2234 2254
2235static void __init calculate_node_totalpages(struct pglist_data *pgdat, 2255static void __init calculate_node_totalpages(struct pglist_data *pgdat,
@@ -2274,12 +2294,35 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2274 2294
2275 for (j = 0; j < MAX_NR_ZONES; j++) { 2295 for (j = 0; j < MAX_NR_ZONES; j++) {
2276 struct zone *zone = pgdat->node_zones + j; 2296 struct zone *zone = pgdat->node_zones + j;
2277 unsigned long size, realsize; 2297 unsigned long size, realsize, memmap_pages;
2278 2298
2279 size = zone_spanned_pages_in_node(nid, j, zones_size); 2299 size = zone_spanned_pages_in_node(nid, j, zones_size);
2280 realsize = size - zone_absent_pages_in_node(nid, j, 2300 realsize = size - zone_absent_pages_in_node(nid, j,
2281 zholes_size); 2301 zholes_size);
2282 2302
2303 /*
2304 * Adjust realsize so that it accounts for how much memory
2305 * is used by this zone for memmap. This affects the watermark
2306 * and per-cpu initialisations
2307 */
2308 memmap_pages = (size * sizeof(struct page)) >> PAGE_SHIFT;
2309 if (realsize >= memmap_pages) {
2310 realsize -= memmap_pages;
2311 printk(KERN_DEBUG
2312 " %s zone: %lu pages used for memmap\n",
2313 zone_names[j], memmap_pages);
2314 } else
2315 printk(KERN_WARNING
2316 " %s zone: %lu pages exceeds realsize %lu\n",
2317 zone_names[j], memmap_pages, realsize);
2318
2319 /* Account for reserved DMA pages */
2320 if (j == ZONE_DMA && realsize > dma_reserve) {
2321 realsize -= dma_reserve;
2322 printk(KERN_DEBUG " DMA zone: %lu pages reserved\n",
2323 dma_reserve);
2324 }
2325
2283 if (!is_highmem_idx(j)) 2326 if (!is_highmem_idx(j))
2284 nr_kernel_pages += realsize; 2327 nr_kernel_pages += realsize;
2285 nr_all_pages += realsize; 2328 nr_all_pages += realsize;
@@ -2596,6 +2639,21 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
2596} 2639}
2597#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ 2640#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
2598 2641
2642/**
2643 * set_dma_reserve - Account the specified number of pages reserved in ZONE_DMA
2644 * @new_dma_reserve - The number of pages to mark reserved
2645 *
2646 * The per-cpu batchsize and zone watermarks are determined by present_pages.
2647 * In the DMA zone, a significant percentage may be consumed by kernel image
2648 * and other unfreeable allocations which can skew the watermarks badly. This
2649 * function may optionally be used to account for unfreeable pages in
2650 * ZONE_DMA. The effect will be lower watermarks and smaller per-cpu batchsize
2651 */
2652void __init set_dma_reserve(unsigned long new_dma_reserve)
2653{
2654 dma_reserve = new_dma_reserve;
2655}
2656
2599#ifndef CONFIG_NEED_MULTIPLE_NODES 2657#ifndef CONFIG_NEED_MULTIPLE_NODES
2600static bootmem_data_t contig_bootmem_data; 2658static bootmem_data_t contig_bootmem_data;
2601struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data }; 2659struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data };