From 6b74ab97bc12ce74acec900f1d89a4aee2e4d70d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 23 Jul 2008 21:26:49 -0700 Subject: mm: add a basic debugging framework for memory initialisation Boot initialisation is very complex, with significant numbers of architecture-specific routines, hooks and code ordering. While significant amounts of the initialisation is architecture-independent, it trusts the data received from the architecture layer. This is a mistake, and has resulted in a number of difficult-to-diagnose bugs. This patchset adds some validation and tracing to memory initialisation. It also introduces a few basic defensive measures. The validation code can be explicitly disabled for embedded systems. This patch: Add additional debugging and verification code for memory initialisation. Once enabled, the verification checks are always run and when required additional debugging information may be outputted via a mminit_loglevel= command-line parameter. The verification code is placed in a new file mm/mm_init.c. Ideally other mm initialisation code will be moved here over time. Signed-off-by: Mel Gorman Cc: Christoph Lameter Cc: Andy Whitcroft Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mm_init.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 mm/mm_init.c (limited to 'mm/mm_init.c') diff --git a/mm/mm_init.c b/mm/mm_init.c new file mode 100644 index 000000000000..c01d8dfec817 --- /dev/null +++ b/mm/mm_init.c @@ -0,0 +1,18 @@ +/* + * mm_init.c - Memory initialisation verification and debugging + * + * Copyright 2008 IBM Corporation, 2008 + * Author Mel Gorman + * + */ +#include +#include + +int __meminitdata mminit_loglevel; + +static __init int set_mminit_loglevel(char *str) +{ + get_option(&str, &mminit_loglevel); + return 0; +} +early_param("mminit_loglevel", set_mminit_loglevel); -- cgit v1.2.2 From 708614e6180f398cd307ea0048d48ba6fa274610 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 23 Jul 2008 21:26:51 -0700 Subject: mm: verify the page links and memory model Print out information on how the page flags are being used if mminit_loglevel is MMINIT_VERIFY or higher and unconditionally performs sanity checks on the flags regardless of loglevel. When the page flags are updated with section, node and zone information, a check are made to ensure the values can be retrieved correctly. Finally we confirm that pfn_to_page and page_to_pfn are the correct inverse functions. [akpm@linux-foundation.org: fix printk warnings] Signed-off-by: Mel Gorman Cc: Christoph Lameter Cc: Andy Whitcroft Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mm_init.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'mm/mm_init.c') diff --git a/mm/mm_init.c b/mm/mm_init.c index c01d8dfec817..e16990d629e6 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -7,9 +7,80 @@ */ #include #include +#include "internal.h" int __meminitdata mminit_loglevel; +void __init mminit_verify_pageflags_layout(void) +{ + int shift, width; + unsigned long or_mask, add_mask; + + shift = 8 * sizeof(unsigned long); + width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH; + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths", + "Section %d Node %d Zone %d Flags %d\n", + SECTIONS_WIDTH, + NODES_WIDTH, + ZONES_WIDTH, + NR_PAGEFLAGS); + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts", + "Section %d Node %d Zone %d\n", +#ifdef SECTIONS_SHIFT + SECTIONS_SHIFT, +#else + 0, +#endif + NODES_SHIFT, + ZONES_SHIFT); + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_offsets", + "Section %lu Node %lu Zone %lu\n", + (unsigned long)SECTIONS_PGSHIFT, + (unsigned long)NODES_PGSHIFT, + (unsigned long)ZONES_PGSHIFT); + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_zoneid", + "Zone ID: %lu -> %lu\n", + (unsigned long)ZONEID_PGOFF, + (unsigned long)(ZONEID_PGOFF + ZONEID_SHIFT)); + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_usage", + "location: %d -> %d unused %d -> %d flags %d -> %d\n", + shift, width, width, NR_PAGEFLAGS, NR_PAGEFLAGS, 0); +#ifdef NODE_NOT_IN_PAGE_FLAGS + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodeflags", + "Node not in page flags"); +#endif + + if (SECTIONS_WIDTH) { + shift -= SECTIONS_WIDTH; + BUG_ON(shift != SECTIONS_PGSHIFT); + } + if (NODES_WIDTH) { + shift -= NODES_WIDTH; + BUG_ON(shift != NODES_PGSHIFT); + } + if (ZONES_WIDTH) { + shift -= ZONES_WIDTH; + BUG_ON(shift != ZONES_PGSHIFT); + } + + /* Check for bitmask overlaps */ + or_mask = (ZONES_MASK << ZONES_PGSHIFT) | + (NODES_MASK << NODES_PGSHIFT) | + (SECTIONS_MASK << SECTIONS_PGSHIFT); + add_mask = (ZONES_MASK << ZONES_PGSHIFT) + + (NODES_MASK << NODES_PGSHIFT) + + (SECTIONS_MASK << SECTIONS_PGSHIFT); + BUG_ON(or_mask != add_mask); +} + +void __meminit mminit_verify_page_links(struct page *page, enum zone_type zone, + unsigned long nid, unsigned long pfn) +{ + BUG_ON(page_to_nid(page) != nid); + BUG_ON(page_zonenum(page) != zone); + BUG_ON(page_to_pfn(page) != pfn); +} + static __init int set_mminit_loglevel(char *str) { get_option(&str, &mminit_loglevel); -- cgit v1.2.2 From 68ad8df42e12037c3894c9706ab428bf5cd6426b Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 23 Jul 2008 21:26:52 -0700 Subject: mm: print out the zonelists on request for manual verification This patch prints out the zonelists during boot for manual verification by the user if the mminit_loglevel is MMINIT_VERIFY or higher. Signed-off-by: Mel Gorman Cc: Christoph Lameter Cc: Andy Whitcroft Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mm_init.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'mm/mm_init.c') diff --git a/mm/mm_init.c b/mm/mm_init.c index e16990d629e6..ce445ca097e7 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -11,6 +11,51 @@ int __meminitdata mminit_loglevel; +/* The zonelists are simply reported, validation is manual. */ +void mminit_verify_zonelist(void) +{ + int nid; + + if (mminit_loglevel < MMINIT_VERIFY) + return; + + for_each_online_node(nid) { + pg_data_t *pgdat = NODE_DATA(nid); + struct zone *zone; + struct zoneref *z; + struct zonelist *zonelist; + int i, listid, zoneid; + + BUG_ON(MAX_ZONELISTS > 2); + for (i = 0; i < MAX_ZONELISTS * MAX_NR_ZONES; i++) { + + /* Identify the zone and nodelist */ + zoneid = i % MAX_NR_ZONES; + listid = i / MAX_NR_ZONES; + zonelist = &pgdat->node_zonelists[listid]; + zone = &pgdat->node_zones[zoneid]; + if (!populated_zone(zone)) + continue; + + /* Print information about the zonelist */ + printk(KERN_DEBUG "mminit::zonelist %s %d:%s = ", + listid > 0 ? "thisnode" : "general", nid, + zone->name); + + /* Iterate the zonelist */ + for_each_zone_zonelist(zone, z, zonelist, zoneid) { +#ifdef CONFIG_NUMA + printk(KERN_CONT "%d:%s ", + zone->node, zone->name); +#else + printk(KERN_CONT "0:%s ", zone->name); +#endif /* CONFIG_NUMA */ + } + printk(KERN_CONT "\n"); + } + } +} + void __init mminit_verify_pageflags_layout(void) { int shift, width; -- cgit v1.2.2 From 5e9426abe209cf134adbbd62c5e73ef534eb73e9 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Wed, 23 Jul 2008 21:27:39 -0700 Subject: mm: remove mm_init compilation dependency on CONFIG_DEBUG_MEMORY_INIT Towards the end of putting all core mm initialization in mm_init.c, I plan on putting the creation of a mm kobject in a function in that file. However, the file is currently only compiled if CONFIG_DEBUG_MEMORY_INIT is set. Remove this dependency, but put the code under an #ifdef on the same config option. This should result in no functional changes. Signed-off-by: Nishanth Aravamudan Cc: Nick Piggin Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mm_init.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mm/mm_init.c') diff --git a/mm/mm_init.c b/mm/mm_init.c index ce445ca097e7..eaf0d3b47099 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -9,6 +9,7 @@ #include #include "internal.h" +#ifdef CONFIG_DEBUG_MEMORY_INIT int __meminitdata mminit_loglevel; /* The zonelists are simply reported, validation is manual. */ @@ -132,3 +133,4 @@ static __init int set_mminit_loglevel(char *str) return 0; } early_param("mminit_loglevel", set_mminit_loglevel); +#endif /* CONFIG_DEBUG_MEMORY_INIT */ -- cgit v1.2.2 From ff7ea79cf7c3a481851bd4b2185fdeb6ce4afa29 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Wed, 23 Jul 2008 21:27:39 -0700 Subject: mm: create /sys/kernel/mm Add a kobject to create /sys/kernel/mm when sysfs is mounted. The kobject will exist regardless. This will allow for the hugepage related sysfs directories to exist under the mm "subsystem" directory. Add an ABI file appropriately. [kosaki.motohiro@jp.fujitsu.com: fix build] Signed-off-by: Nishanth Aravamudan Cc: Nick Piggin Cc: Mel Gorman Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mm_init.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'mm/mm_init.c') diff --git a/mm/mm_init.c b/mm/mm_init.c index eaf0d3b47099..c6af41ea9994 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -7,6 +7,8 @@ */ #include #include +#include +#include #include "internal.h" #ifdef CONFIG_DEBUG_MEMORY_INIT @@ -134,3 +136,17 @@ static __init int set_mminit_loglevel(char *str) } early_param("mminit_loglevel", set_mminit_loglevel); #endif /* CONFIG_DEBUG_MEMORY_INIT */ + +struct kobject *mm_kobj; +EXPORT_SYMBOL_GPL(mm_kobj); + +static int __init mm_sysfs_init(void) +{ + mm_kobj = kobject_create_and_add("mm", kernel_kobj); + if (!mm_kobj) + return -ENOMEM; + + return 0; +} + +__initcall(mm_sysfs_init); -- cgit v1.2.2 From 5c9ffc9c3d61dfcafd7cdb61c7b94f2d7ac408fb Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 5 Aug 2008 13:01:01 -0700 Subject: mm_init.c: avoid ifdef-inside-macro-expansion gcc-3.2: mm/mm_init.c:77:1: directives may not be used inside a macro argument mm/mm_init.c:76:47: unterminated argument list invoking macro "mminit_dprintk" mm/mm_init.c: In function `mminit_verify_pageflags_layout': mm/mm_init.c:80: `mminit_dprintk' undeclared (first use in this function) mm/mm_init.c:80: (Each undeclared identifier is reported only once mm/mm_init.c:80: for each function it appears in.) mm/mm_init.c:80: syntax error before numeric constant Also fix a typo in a comment. Reported-by: Adrian Bunk Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mm_init.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'mm/mm_init.c') diff --git a/mm/mm_init.c b/mm/mm_init.c index c6af41ea9994..936ef2efd892 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -14,6 +14,10 @@ #ifdef CONFIG_DEBUG_MEMORY_INIT int __meminitdata mminit_loglevel; +#ifndef SECTIONS_SHIFT +#define SECTIONS_SHIFT 0 +#endif + /* The zonelists are simply reported, validation is manual. */ void mminit_verify_zonelist(void) { @@ -74,11 +78,7 @@ void __init mminit_verify_pageflags_layout(void) NR_PAGEFLAGS); mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts", "Section %d Node %d Zone %d\n", -#ifdef SECTIONS_SHIFT SECTIONS_SHIFT, -#else - 0, -#endif NODES_SHIFT, ZONES_SHIFT); mminit_dprintk(MMINIT_TRACE, "pageflags_layout_offsets", -- cgit v1.2.2