aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2006-09-27 04:49:43 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-27 11:26:11 -0400
commitc713216deebd95d2b0ab38fef8bb2361c0180c2d (patch)
treea5a8c61be427e3591811ff712b9ec7ef2f1a1f20 /include/linux
parent2bd0cfbde2c0a74e209acbf045f1298cc2f61e01 (diff)
[PATCH] Introduce mechanism for registering active regions of memory
At a basic level, architectures define structures to record where active ranges of page frames are located. Once located, the code to calculate zone sizes and holes in each architecture is very similar. Some of this zone and hole sizing code is difficult to read for no good reason. This set of patches eliminates the similar-looking architecture-specific code. The patches introduce a mechanism where architectures register where the active ranges of page frames are with add_active_range(). When all areas have been discovered, free_area_init_nodes() is called to initialise the pgdat and zones. The zone sizes and holes are then calculated in an architecture independent manner. Patch 1 introduces the mechanism for registering and initialising PFN ranges Patch 2 changes ppc to use the mechanism - 139 arch-specific LOC removed Patch 3 changes x86 to use the mechanism - 136 arch-specific LOC removed Patch 4 changes x86_64 to use the mechanism - 74 arch-specific LOC removed Patch 5 changes ia64 to use the mechanism - 52 arch-specific LOC removed Patch 6 accounts for mem_map as a memory hole as the pages are not reclaimable. It adjusts the watermarks slightly Tony Luck has successfully tested for ia64 on Itanium with tiger_defconfig, gensparse_defconfig and defconfig. Bob Picco has also tested and debugged on IA64. Jack Steiner successfully boot tested on a mammoth SGI IA64-based machine. These were on patches against 2.6.17-rc1 and release 3 of these patches but there have been no ia64-changes since release 3. There are differences in the zone sizes for x86_64 as the arch-specific code for x86_64 accounts the kernel image and the starting mem_maps as memory holes but the architecture-independent code accounts the memory as present. The big benefit of this set of patches is a sizable reduction of architecture-specific code, some of which is very hairy. There should be a greater reduction when other architectures use the same mechanisms for zone and hole sizing but I lack the hardware to test on. Additional credit; Dave Hansen for the initial suggestion and comments on early patches Andy Whitcroft for reviewing early versions and catching numerous errors Tony Luck for testing and debugging on IA64 Bob Picco for fixing bugs related to pfn registration, reviewing a number of patch revisions, providing a number of suggestions on future direction and testing heavily Jack Steiner and Robin Holt for testing on IA64 and clarifying issues related to memory holes Yasunori for testing on IA64 Andi Kleen for reviewing and feeding back about x86_64 Christian Kujau for providing valuable information related to ACPI problems on x86_64 and testing potential fixes This patch: Define the structure to represent an active range of page frames within a node in an architecture independent manner. Architectures are expected to register active ranges of PFNs using add_active_range(nid, start_pfn, end_pfn) and call free_area_init_nodes() passing the PFNs of the end of each zone. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Bob Picco <bob.picco@hp.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Andy Whitcroft <apw@shadowen.org> Cc: Andi Kleen <ak@muc.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: "Keith Mannthey" <kmannth@gmail.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Yasunori Goto <y-goto@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/mm.h47
-rw-r--r--include/linux/mmzone.h10
2 files changed, 56 insertions, 1 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 856f0ee7e84a..c0402da7cce0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -937,6 +937,53 @@ extern void free_area_init(unsigned long * zones_size);
937extern void free_area_init_node(int nid, pg_data_t *pgdat, 937extern void free_area_init_node(int nid, pg_data_t *pgdat,
938 unsigned long * zones_size, unsigned long zone_start_pfn, 938 unsigned long * zones_size, unsigned long zone_start_pfn,
939 unsigned long *zholes_size); 939 unsigned long *zholes_size);
940#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
941/*
942 * With CONFIG_ARCH_POPULATES_NODE_MAP set, an architecture may initialise its
943 * zones, allocate the backing mem_map and account for memory holes in a more
944 * architecture independent manner. This is a substitute for creating the
945 * zone_sizes[] and zholes_size[] arrays and passing them to
946 * free_area_init_node()
947 *
948 * An architecture is expected to register range of page frames backed by
949 * physical memory with add_active_range() before calling
950 * free_area_init_nodes() passing in the PFN each zone ends at. At a basic
951 * usage, an architecture is expected to do something like
952 *
953 * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn,
954 * max_highmem_pfn};
955 * for_each_valid_physical_page_range()
956 * add_active_range(node_id, start_pfn, end_pfn)
957 * free_area_init_nodes(max_zone_pfns);
958 *
959 * If the architecture guarantees that there are no holes in the ranges
960 * registered with add_active_range(), free_bootmem_active_regions()
961 * will call free_bootmem_node() for each registered physical page range.
962 * Similarly sparse_memory_present_with_active_regions() calls
963 * memory_present() for each range when SPARSEMEM is enabled.
964 *
965 * See mm/page_alloc.c for more information on each function exposed by
966 * CONFIG_ARCH_POPULATES_NODE_MAP
967 */
968extern void free_area_init_nodes(unsigned long *max_zone_pfn);
969extern void add_active_range(unsigned int nid, unsigned long start_pfn,
970 unsigned long end_pfn);
971extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
972 unsigned long new_end_pfn);
973extern void remove_all_active_ranges(void);
974extern unsigned long absent_pages_in_range(unsigned long start_pfn,
975 unsigned long end_pfn);
976extern void get_pfn_range_for_nid(unsigned int nid,
977 unsigned long *start_pfn, unsigned long *end_pfn);
978extern unsigned long find_min_pfn_with_active_regions(void);
979extern unsigned long find_max_pfn_with_active_regions(void);
980extern void free_bootmem_with_active_regions(int nid,
981 unsigned long max_low_pfn);
982extern void sparse_memory_present_with_active_regions(int nid);
983#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
984extern int early_pfn_to_nid(unsigned long pfn);
985#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
986#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
940extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long); 987extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
941extern void setup_per_zone_pages_min(void); 988extern void setup_per_zone_pages_min(void);
942extern void mem_init(void); 989extern void mem_init(void);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3693f1a52788..7fa1cbe9fa7a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -305,6 +305,13 @@ struct zonelist {
305 struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited 305 struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited
306}; 306};
307 307
308#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
309struct node_active_region {
310 unsigned long start_pfn;
311 unsigned long end_pfn;
312 int nid;
313};
314#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
308 315
309/* 316/*
310 * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM 317 * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
@@ -518,7 +525,8 @@ extern struct zone *next_zone(struct zone *zone);
518 525
519#endif 526#endif
520 527
521#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID 528#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \
529 !defined(CONFIG_ARCH_POPULATES_NODE_MAP)
522#define early_pfn_to_nid(nid) (0UL) 530#define early_pfn_to_nid(nid) (0UL)
523#endif 531#endif
524 532