mm/page_owner: keep track of page owners

This is the page owner tracking code which is introduced so far ago. It is resident on Andrew's tree, though, nobody tried to upstream so it remain as is. Our company uses this feature actively to debug memory leak or to find a memory hogger so I decide to upstream this feature. This functionality help us to know who allocates the page. When allocating a page, we store some information about allocation in extra memory. Later, if we need to know status of all pages, we can get and analyze it from this stored information. In previous version of this feature, extra memory is statically defined in struct page, but, in this version, extra memory is allocated outside of struct page. It enables us to turn on/off this feature at boottime without considerable memory waste. Although we already have tracepoint for tracing page allocation/free, using it to analyze page owner is rather complex. We need to enlarge the trace buffer for preventing overlapping until userspace program launched. And, launched program continually dump out the trace buffer for later analysis and it would change system behaviour with more possibility rather than just keeping it in memory, so bad for debug. Moreover, we can use page_owner feature further for various purposes. For example, we can use it for fragmentation statistics implemented in this patch. And, I also plan to implement some CMA failure debugging feature using this interface. I'd like to give the credit for all developers contributed this feature, but, it's not easy because I don't know exact history. Sorry about that. Below is people who has "Signed-off-by" in the patches in Andrew's tree. Contributor: Alexander Nyberg <alexn@dsv.su.se> Mel Gorman <mgorman@suse.de> Dave Hansen <dave@linux.vnet.ibm.com> Minchan Kim <minchan@kernel.org> Michal Nazarewicz <mina86@mina86.com> Andrew Morton <akpm@linux-foundation.org> Jungsoo Son <jungsoo.son@lge.com> Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Dave Hansen <dave@sr71.net> Cc: Michal Nazarewicz <mina86@mina86.com> Cc: Jungsoo Son <jungsoo.son@lge.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Joonsoo Kim <iamjoonsoo.kim@lge.com> 2014-12-12 19:56:01 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-12-13 15:42:48 -0500
commit: 48c96a3685795e52903e60c7ee115e5e22e7d640 (patch)
tree: 49940b1971c9b487a52b2c91b2423eee9278ced5 /mm
parent: 9a92a6ce6f842713ccd0025c5228fe8bea61234c (diff)
5 files changed, 338 insertions, 1 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 580cd3f392af..4bf586e66378 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
 obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
 obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
+obj-$(CONFIG_PAGE_OWNER) += page_owner.o
 obj-$(CONFIG_CLEANCACHE) += cleancache.o
 obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
 obj-$(CONFIG_ZPOOL)     += zpool.o
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 303d38516807..c13b6b29add2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -59,6 +59,7 @@
 #include <linux/page_ext.h>
 #include <linux/hugetlb.h>
 #include <linux/sched/rt.h>
+#include <linux/page_owner.h>
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
@@ -813,6 +814,8 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
        if (bad)
                return false;
+        reset_page_owner(page, order);
        if (!PageHighMem(page)) {
                debug_check_no_locks_freed(page_address(page),
                                           PAGE_SIZE << order);
@@ -988,6 +991,8 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags)
        if (order && (gfp_flags & __GFP_COMP))
                prep_compound_page(page, order);
+        set_page_owner(page, order, gfp_flags);
        return 0;
 }
@@ -1560,8 +1565,11 @@ void split_page(struct page *page, unsigned int order)
                split_page(virt_to_page(page[0].shadow), order);
 #endif
-        for (i = 1; i < (1 << order); i++)
+        set_page_owner(page, 0, 0);
+        for (i = 1; i < (1 << order); i++) {
                set_page_refcounted(page + i);
+                set_page_owner(page + i, 0, 0);
+        }
 }
 EXPORT_SYMBOL_GPL(split_page);
@@ -1601,6 +1609,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
                }
        }
+        set_page_owner(page, order, 0);
        return 1UL << order;
 }
diff --git a/mm/page_ext.c b/mm/page_ext.c
index c2cd7b15f0de..d86fd2f5353f 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -5,6 +5,7 @@
 #include <linux/memory.h>
 #include <linux/vmalloc.h>
 #include <linux/kmemleak.h>
+#include <linux/page_owner.h>
 /*
 * struct page extension
@@ -55,6 +56,9 @@ static struct page_ext_operations *page_ext_ops[] = {
 #ifdef CONFIG_PAGE_POISONING
        &page_poisoning_ops,
 #endif
+#ifdef CONFIG_PAGE_OWNER
+        &page_owner_ops,
+#endif
 };
 static unsigned long total_usage;
diff --git a/mm/page_owner.c b/mm/page_owner.c
new file mode 100644
index 000000000000..85eec7ea6735
--- /dev/null
+++ b/mm/page_owner.c
@@ -0,0 +1,222 @@
+#include <linux/debugfs.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/bootmem.h>
+#include <linux/stacktrace.h>
+#include <linux/page_owner.h>
+#include "internal.h"
+static bool page_owner_disabled = true;
+bool page_owner_inited __read_mostly;
+static int early_page_owner_param(char *buf)
+{
+        if (!buf)
+                return -EINVAL;
+        if (strcmp(buf, "on") == 0)
+                page_owner_disabled = false;
+        return 0;
+}
+early_param("page_owner", early_page_owner_param);
+static bool need_page_owner(void)
+{
+        if (page_owner_disabled)
+                return false;
+        return true;
+}
+static void init_page_owner(void)
+{
+        if (page_owner_disabled)
+                return;
+        page_owner_inited = true;
+}
+struct page_ext_operations page_owner_ops = {
+        .need = need_page_owner,
+        .init = init_page_owner,
+};
+void __reset_page_owner(struct page *page, unsigned int order)
+{
+        int i;
+        struct page_ext *page_ext;
+        for (i = 0; i < (1 << order); i++) {
+                page_ext = lookup_page_ext(page + i);
+                __clear_bit(PAGE_EXT_OWNER, &page_ext->flags);
+        }
+}
+void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask)
+{
+        struct page_ext *page_ext;
+        struct stack_trace *trace;
+        page_ext = lookup_page_ext(page);
+        trace = &page_ext->trace;
+        trace->nr_entries = 0;
+        trace->max_entries = ARRAY_SIZE(page_ext->trace_entries);
+        trace->entries = &page_ext->trace_entries[0];
+        trace->skip = 3;
+        save_stack_trace(&page_ext->trace);
+        page_ext->order = order;
+        page_ext->gfp_mask = gfp_mask;
+        __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+}
+static ssize_t
+print_page_owner(char __user *buf, size_t count, unsigned long pfn,
+                struct page *page, struct page_ext *page_ext)
+{
+        int ret;
+        int pageblock_mt, page_mt;
+        char *kbuf;
+        kbuf = kmalloc(count, GFP_KERNEL);
+        if (!kbuf)
+                return -ENOMEM;
+        ret = snprintf(kbuf, count,
+                        "Page allocated via order %u, mask 0x%x\n",
+                        page_ext->order, page_ext->gfp_mask);
+        if (ret >= count)
+                goto err;
+        /* Print information relevant to grouping pages by mobility */
+        pageblock_mt = get_pfnblock_migratetype(page, pfn);
+        page_mt  = gfpflags_to_migratetype(page_ext->gfp_mask);
+        ret += snprintf(kbuf + ret, count - ret,
+                        "PFN %lu Block %lu type %d %s Flags %s%s%s%s%s%s%s%s%s%s%s%s\n",
+                        pfn,
+                        pfn >> pageblock_order,
+                        pageblock_mt,
+                        pageblock_mt != page_mt ? "Fallback" : "        ",
+                        PageLocked(page)        ? "K" : " ",
+                        PageError(page)         ? "E" : " ",
+                        PageReferenced(page)    ? "R" : " ",
+                        PageUptodate(page)      ? "U" : " ",
+                        PageDirty(page)         ? "D" : " ",
+                        PageLRU(page)           ? "L" : " ",
+                        PageActive(page)        ? "A" : " ",
+                        PageSlab(page)          ? "S" : " ",
+                        PageWriteback(page)     ? "W" : " ",
+                        PageCompound(page)      ? "C" : " ",
+                        PageSwapCache(page)     ? "B" : " ",
+                        PageMappedToDisk(page)  ? "M" : " ");
+        if (ret >= count)
+                goto err;
+        ret += snprint_stack_trace(kbuf + ret, count - ret,
+                                        &page_ext->trace, 0);
+        if (ret >= count)
+                goto err;
+        ret += snprintf(kbuf + ret, count - ret, "\n");
+        if (ret >= count)
+                goto err;
+        if (copy_to_user(buf, kbuf, ret))
+                ret = -EFAULT;
+        kfree(kbuf);
+        return ret;
+err:
+        kfree(kbuf);
+        return -ENOMEM;
+}
+static ssize_t
+read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+        unsigned long pfn;
+        struct page *page;
+        struct page_ext *page_ext;
+        if (!page_owner_inited)
+                return -EINVAL;
+        page = NULL;
+        pfn = min_low_pfn + *ppos;
+        /* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */
+        while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0)
+                pfn++;
+        drain_all_pages(NULL);
+        /* Find an allocated page */
+        for (; pfn < max_pfn; pfn++) {
+                /*
+                 * If the new page is in a new MAX_ORDER_NR_PAGES area,
+                 * validate the area as existing, skip it if not
+                 */
+                if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) {
+                        pfn += MAX_ORDER_NR_PAGES - 1;
+                        continue;
+                }
+                /* Check for holes within a MAX_ORDER area */
+                if (!pfn_valid_within(pfn))
+                        continue;
+                page = pfn_to_page(pfn);
+                if (PageBuddy(page)) {
+                        unsigned long freepage_order = page_order_unsafe(page);
+                        if (freepage_order < MAX_ORDER)
+                                pfn += (1UL << freepage_order) - 1;
+                        continue;
+                }
+                page_ext = lookup_page_ext(page);
+                /*
+                 * Pages allocated before initialization of page_owner are
+                 * non-buddy and have no page_owner info.
+                 */
+                if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
+                        continue;
+                /* Record the next PFN to read in the file offset */
+                *ppos = (pfn - min_low_pfn) + 1;
+                return print_page_owner(buf, count, pfn, page, page_ext);
+        }
+        return 0;
+}
+static const struct file_operations proc_page_owner_operations = {
+        .read           = read_page_owner,
+};
+static int __init pageowner_init(void)
+{
+        struct dentry *dentry;
+        if (!page_owner_inited) {
+                pr_info("page_owner is disabled\n");
+                return 0;
+        }
+        dentry = debugfs_create_file("page_owner", S_IRUSR, NULL,
+                        NULL, &proc_page_owner_operations);
+        if (IS_ERR(dentry))
+                return PTR_ERR(dentry);
+        return 0;
+}
+module_init(pageowner_init)
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 1b12d390dc68..b090e9e3d626 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -22,6 +22,8 @@
 #include <linux/writeback.h>
 #include <linux/compaction.h>
 #include <linux/mm_inline.h>
+#include <linux/page_ext.h>
+#include <linux/page_owner.h>
 #include "internal.h"
@@ -1017,6 +1019,104 @@ static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
        return 0;
 }
+#ifdef CONFIG_PAGE_OWNER
+static void pagetypeinfo_showmixedcount_print(struct seq_file *m,
+                                                        pg_data_t *pgdat,
+                                                        struct zone *zone)
+{
+        struct page *page;
+        struct page_ext *page_ext;
+        unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
+        unsigned long end_pfn = pfn + zone->spanned_pages;
+        unsigned long count[MIGRATE_TYPES] = { 0, };
+        int pageblock_mt, page_mt;
+        int i;
+        /* Scan block by block. First and last block may be incomplete */
+        pfn = zone->zone_start_pfn;
+        /*
+         * Walk the zone in pageblock_nr_pages steps. If a page block spans
+         * a zone boundary, it will be double counted between zones. This does
+         * not matter as the mixed block count will still be correct
+         */
+        for (; pfn < end_pfn; ) {
+                if (!pfn_valid(pfn)) {
+                        pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
+                        continue;
+                }
+                block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+                block_end_pfn = min(block_end_pfn, end_pfn);
+                page = pfn_to_page(pfn);
+                pageblock_mt = get_pfnblock_migratetype(page, pfn);
+                for (; pfn < block_end_pfn; pfn++) {
+                        if (!pfn_valid_within(pfn))
+                                continue;
+                        page = pfn_to_page(pfn);
+                        if (PageBuddy(page)) {
+                                pfn += (1UL << page_order(page)) - 1;
+                                continue;
+                        }
+                        if (PageReserved(page))
+                                continue;
+                        page_ext = lookup_page_ext(page);
+                        if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
+                                continue;
+                        page_mt = gfpflags_to_migratetype(page_ext->gfp_mask);
+                        if (pageblock_mt != page_mt) {
+                                if (is_migrate_cma(pageblock_mt))
+                                        count[MIGRATE_MOVABLE]++;
+                                else
+                                        count[pageblock_mt]++;
+                                pfn = block_end_pfn;
+                                break;
+                        }
+                        pfn += (1UL << page_ext->order) - 1;
+                }
+        }
+        /* Print counts */
+        seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
+        for (i = 0; i < MIGRATE_TYPES; i++)
+                seq_printf(m, "%12lu ", count[i]);
+        seq_putc(m, '\n');
+}
+#endif /* CONFIG_PAGE_OWNER */
+/*
+ * Print out the number of pageblocks for each migratetype that contain pages
+ * of other types. This gives an indication of how well fallbacks are being
+ * contained by rmqueue_fallback(). It requires information from PAGE_OWNER
+ * to determine what is going on
+ */
+static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
+{
+#ifdef CONFIG_PAGE_OWNER
+        int mtype;
+        if (!page_owner_inited)
+                return;
+        drain_all_pages(NULL);
+        seq_printf(m, "\n%-23s", "Number of mixed blocks ");
+        for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
+                seq_printf(m, "%12s ", migratetype_names[mtype]);
+        seq_putc(m, '\n');
+        walk_zones_in_node(m, pgdat, pagetypeinfo_showmixedcount_print);
+#endif /* CONFIG_PAGE_OWNER */
+}
 /*
 * This prints out statistics in relation to grouping pages by mobility.
 * It is expensive to collect so do not constantly read the file.
@@ -1034,6 +1134,7 @@ static int pagetypeinfo_show(struct seq_file *m, void *arg)
        seq_putc(m, '\n');
        pagetypeinfo_showfree(m, pgdat);
        pagetypeinfo_showblockcount(m, pgdat);
+        pagetypeinfo_showmixedcount(m, pgdat);
        return 0;
 }
author	Joonsoo Kim <iamjoonsoo.kim@lge.com>	2014-12-12 19:56:01 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-12-13 15:42:48 -0500
commit	48c96a3685795e52903e60c7ee115e5e22e7d640 (patch)
tree	49940b1971c9b487a52b2c91b2423eee9278ced5 /mm
parent	9a92a6ce6f842713ccd0025c5228fe8bea61234c (diff)

diff --git a/mm/Makefile b/mm/Makefile index 580cd3f392af..4bf586e66378 100644 --- a/mm/Makefile +++ b/mm/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
63	obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o	63	obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
64	obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o	64	obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
65	obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o	65	obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
		66	obj-$(CONFIG_PAGE_OWNER) += page_owner.o
66	obj-$(CONFIG_CLEANCACHE) += cleancache.o	67	obj-$(CONFIG_CLEANCACHE) += cleancache.o
67	obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o	68	obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
68	obj-$(CONFIG_ZPOOL) += zpool.o	69	obj-$(CONFIG_ZPOOL) += zpool.o


diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 303d38516807..c13b6b29add2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -59,6 +59,7 @@
59	#include <linux/page_ext.h>	59	#include <linux/page_ext.h>
60	#include <linux/hugetlb.h>	60	#include <linux/hugetlb.h>
61	#include <linux/sched/rt.h>	61	#include <linux/sched/rt.h>
		62	#include <linux/page_owner.h>
62		63
63	#include <asm/sections.h>	64	#include <asm/sections.h>
64	#include <asm/tlbflush.h>	65	#include <asm/tlbflush.h>
@@ -813,6 +814,8 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
813	if (bad)	814	if (bad)
814	return false;	815	return false;
815		816
		817	reset_page_owner(page, order);
		818
816	if (!PageHighMem(page)) {	819	if (!PageHighMem(page)) {
817	debug_check_no_locks_freed(page_address(page),	820	debug_check_no_locks_freed(page_address(page),
818	PAGE_SIZE << order);	821	PAGE_SIZE << order);
@@ -988,6 +991,8 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags)
988	if (order && (gfp_flags & __GFP_COMP))	991	if (order && (gfp_flags & __GFP_COMP))
989	prep_compound_page(page, order);	992	prep_compound_page(page, order);
990		993
		994	set_page_owner(page, order, gfp_flags);
		995
991	return 0;	996	return 0;
992	}	997	}
993		998
@@ -1560,8 +1565,11 @@ void split_page(struct page *page, unsigned int order)
1560	split_page(virt_to_page(page[0].shadow), order);	1565	split_page(virt_to_page(page[0].shadow), order);
1561	#endif	1566	#endif
1562		1567
1563	for (i = 1; i < (1 << order); i++)	1568	set_page_owner(page, 0, 0);
		1569	for (i = 1; i < (1 << order); i++) {
1564	set_page_refcounted(page + i);	1570	set_page_refcounted(page + i);
		1571	set_page_owner(page + i, 0, 0);
		1572	}
1565	}	1573	}
1566	EXPORT_SYMBOL_GPL(split_page);	1574	EXPORT_SYMBOL_GPL(split_page);
1567		1575
@@ -1601,6 +1609,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
1601	}	1609	}
1602	}	1610	}
1603		1611
		1612	set_page_owner(page, order, 0);
1604	return 1UL << order;	1613	return 1UL << order;
1605	}	1614	}
1606		1615


diff --git a/mm/page_ext.c b/mm/page_ext.c index c2cd7b15f0de..d86fd2f5353f 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c
@@ -5,6 +5,7 @@
5	#include <linux/memory.h>	5	#include <linux/memory.h>
6	#include <linux/vmalloc.h>	6	#include <linux/vmalloc.h>
7	#include <linux/kmemleak.h>	7	#include <linux/kmemleak.h>
		8	#include <linux/page_owner.h>
8		9
9	/*	10	/*
10	* struct page extension	11	* struct page extension
@@ -55,6 +56,9 @@ static struct page_ext_operations *page_ext_ops[] = {
55	#ifdef CONFIG_PAGE_POISONING	56	#ifdef CONFIG_PAGE_POISONING
56	&page_poisoning_ops,	57	&page_poisoning_ops,
57	#endif	58	#endif
		59	#ifdef CONFIG_PAGE_OWNER
		60	&page_owner_ops,
		61	#endif
58	};	62	};
59		63
60	static unsigned long total_usage;	64	static unsigned long total_usage;


diff --git a/mm/page_owner.c b/mm/page_owner.c new file mode 100644 index 000000000000..85eec7ea6735 --- /dev/null +++ b/mm/page_owner.c
@@ -0,0 +1,222 @@
		1	#include <linux/debugfs.h>
		2	#include <linux/mm.h>
		3	#include <linux/slab.h>
		4	#include <linux/uaccess.h>
		5	#include <linux/bootmem.h>
		6	#include <linux/stacktrace.h>
		7	#include <linux/page_owner.h>
		8	#include "internal.h"
		9
		10	static bool page_owner_disabled = true;
		11	bool page_owner_inited __read_mostly;
		12
		13	static int early_page_owner_param(char *buf)
		14	{
		15	if (!buf)
		16	return -EINVAL;
		17
		18	if (strcmp(buf, "on") == 0)
		19	page_owner_disabled = false;
		20
		21	return 0;
		22	}
		23	early_param("page_owner", early_page_owner_param);
		24
		25	static bool need_page_owner(void)
		26	{
		27	if (page_owner_disabled)
		28	return false;
		29
		30	return true;
		31	}
		32
		33	static void init_page_owner(void)
		34	{
		35	if (page_owner_disabled)
		36	return;
		37
		38	page_owner_inited = true;
		39	}
		40
		41	struct page_ext_operations page_owner_ops = {
		42	.need = need_page_owner,
		43	.init = init_page_owner,
		44	};
		45
		46	void __reset_page_owner(struct page *page, unsigned int order)
		47	{
		48	int i;
		49	struct page_ext *page_ext;
		50
		51	for (i = 0; i < (1 << order); i++) {
		52	page_ext = lookup_page_ext(page + i);
		53	__clear_bit(PAGE_EXT_OWNER, &page_ext->flags);
		54	}
		55	}
		56
		57	void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask)
		58	{
		59	struct page_ext *page_ext;
		60	struct stack_trace *trace;
		61
		62	page_ext = lookup_page_ext(page);
		63
		64	trace = &page_ext->trace;
		65	trace->nr_entries = 0;
		66	trace->max_entries = ARRAY_SIZE(page_ext->trace_entries);
		67	trace->entries = &page_ext->trace_entries[0];
		68	trace->skip = 3;
		69	save_stack_trace(&page_ext->trace);
		70
		71	page_ext->order = order;
		72	page_ext->gfp_mask = gfp_mask;
		73
		74	__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
		75	}
		76
		77	static ssize_t
		78	print_page_owner(char __user *buf, size_t count, unsigned long pfn,
		79	struct page page, struct page_ext page_ext)
		80	{
		81	int ret;
		82	int pageblock_mt, page_mt;
		83	char *kbuf;
		84
		85	kbuf = kmalloc(count, GFP_KERNEL);
		86	if (!kbuf)
		87	return -ENOMEM;
		88
		89	ret = snprintf(kbuf, count,
		90	"Page allocated via order %u, mask 0x%x\n",
		91	page_ext->order, page_ext->gfp_mask);
		92
		93	if (ret >= count)
		94	goto err;
		95
		96	/* Print information relevant to grouping pages by mobility */
		97	pageblock_mt = get_pfnblock_migratetype(page, pfn);
		98	page_mt = gfpflags_to_migratetype(page_ext->gfp_mask);
		99	ret += snprintf(kbuf + ret, count - ret,
		100	"PFN %lu Block %lu type %d %s Flags %s%s%s%s%s%s%s%s%s%s%s%s\n",
		101	pfn,
		102	pfn >> pageblock_order,
		103	pageblock_mt,
		104	pageblock_mt != page_mt ? "Fallback" : " ",
		105	PageLocked(page) ? "K" : " ",
		106	PageError(page) ? "E" : " ",
		107	PageReferenced(page) ? "R" : " ",
		108	PageUptodate(page) ? "U" : " ",
		109	PageDirty(page) ? "D" : " ",
		110	PageLRU(page) ? "L" : " ",
		111	PageActive(page) ? "A" : " ",
		112	PageSlab(page) ? "S" : " ",
		113	PageWriteback(page) ? "W" : " ",
		114	PageCompound(page) ? "C" : " ",
		115	PageSwapCache(page) ? "B" : " ",
		116	PageMappedToDisk(page) ? "M" : " ");
		117
		118	if (ret >= count)
		119	goto err;
		120
		121	ret += snprint_stack_trace(kbuf + ret, count - ret,
		122	&page_ext->trace, 0);
		123	if (ret >= count)
		124	goto err;
		125
		126	ret += snprintf(kbuf + ret, count - ret, "\n");
		127	if (ret >= count)
		128	goto err;
		129
		130	if (copy_to_user(buf, kbuf, ret))
		131	ret = -EFAULT;
		132
		133	kfree(kbuf);
		134	return ret;
		135
		136	err:
		137	kfree(kbuf);
		138	return -ENOMEM;
		139	}
		140
		141	static ssize_t
		142	read_page_owner(struct file file, char __user buf, size_t count, loff_t *ppos)
		143	{
		144	unsigned long pfn;
		145	struct page *page;
		146	struct page_ext *page_ext;
		147
		148	if (!page_owner_inited)
		149	return -EINVAL;
		150
		151	page = NULL;
		152	pfn = min_low_pfn + *ppos;
		153
		154	/* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */
		155	while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0)
		156	pfn++;
		157
		158	drain_all_pages(NULL);
		159
		160	/* Find an allocated page */
		161	for (; pfn < max_pfn; pfn++) {
		162	/*
		163	* If the new page is in a new MAX_ORDER_NR_PAGES area,
		164	* validate the area as existing, skip it if not
		165	*/
		166	if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) {
		167	pfn += MAX_ORDER_NR_PAGES - 1;
		168	continue;
		169	}
		170
		171	/* Check for holes within a MAX_ORDER area */
		172	if (!pfn_valid_within(pfn))
		173	continue;
		174
		175	page = pfn_to_page(pfn);
		176	if (PageBuddy(page)) {
		177	unsigned long freepage_order = page_order_unsafe(page);
		178
		179	if (freepage_order < MAX_ORDER)
		180	pfn += (1UL << freepage_order) - 1;
		181	continue;
		182	}
		183
		184	page_ext = lookup_page_ext(page);
		185
		186	/*
		187	* Pages allocated before initialization of page_owner are
		188	* non-buddy and have no page_owner info.
		189	*/
		190	if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
		191	continue;
		192
		193	/* Record the next PFN to read in the file offset */
		194	*ppos = (pfn - min_low_pfn) + 1;
		195
		196	return print_page_owner(buf, count, pfn, page, page_ext);
		197	}
		198
		199	return 0;
		200	}
		201
		202	static const struct file_operations proc_page_owner_operations = {
		203	.read = read_page_owner,
		204	};
		205
		206	static int __init pageowner_init(void)
		207	{
		208	struct dentry *dentry;
		209
		210	if (!page_owner_inited) {
		211	pr_info("page_owner is disabled\n");
		212	return 0;
		213	}
		214
		215	dentry = debugfs_create_file("page_owner", S_IRUSR, NULL,
		216	NULL, &proc_page_owner_operations);
		217	if (IS_ERR(dentry))
		218	return PTR_ERR(dentry);
		219
		220	return 0;
		221	}
		222	module_init(pageowner_init)


diff --git a/mm/vmstat.c b/mm/vmstat.c index 1b12d390dc68..b090e9e3d626 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c
@@ -22,6 +22,8 @@
22	#include <linux/writeback.h>	22	#include <linux/writeback.h>
23	#include <linux/compaction.h>	23	#include <linux/compaction.h>
24	#include <linux/mm_inline.h>	24	#include <linux/mm_inline.h>
		25	#include <linux/page_ext.h>
		26	#include <linux/page_owner.h>
25		27
26	#include "internal.h"	28	#include "internal.h"
27		29
@@ -1017,6 +1019,104 @@ static int pagetypeinfo_showblockcount(struct seq_file m, void arg)
1017	return 0;	1019	return 0;
1018	}	1020	}
1019		1021
		1022	#ifdef CONFIG_PAGE_OWNER
		1023	static void pagetypeinfo_showmixedcount_print(struct seq_file *m,
		1024	pg_data_t *pgdat,
		1025	struct zone *zone)
		1026	{
		1027	struct page *page;
		1028	struct page_ext *page_ext;
		1029	unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
		1030	unsigned long end_pfn = pfn + zone->spanned_pages;
		1031	unsigned long count[MIGRATE_TYPES] = { 0, };
		1032	int pageblock_mt, page_mt;
		1033	int i;
		1034
		1035	/* Scan block by block. First and last block may be incomplete */
		1036	pfn = zone->zone_start_pfn;
		1037
		1038	/*
		1039	* Walk the zone in pageblock_nr_pages steps. If a page block spans
		1040	* a zone boundary, it will be double counted between zones. This does
		1041	* not matter as the mixed block count will still be correct
		1042	*/
		1043	for (; pfn < end_pfn; ) {
		1044	if (!pfn_valid(pfn)) {
		1045	pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
		1046	continue;
		1047	}
		1048
		1049	block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
		1050	block_end_pfn = min(block_end_pfn, end_pfn);
		1051
		1052	page = pfn_to_page(pfn);
		1053	pageblock_mt = get_pfnblock_migratetype(page, pfn);
		1054
		1055	for (; pfn < block_end_pfn; pfn++) {
		1056	if (!pfn_valid_within(pfn))
		1057	continue;
		1058
		1059	page = pfn_to_page(pfn);
		1060	if (PageBuddy(page)) {
		1061	pfn += (1UL << page_order(page)) - 1;
		1062	continue;
		1063	}
		1064
		1065	if (PageReserved(page))
		1066	continue;
		1067
		1068	page_ext = lookup_page_ext(page);
		1069
		1070	if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
		1071	continue;
		1072
		1073	page_mt = gfpflags_to_migratetype(page_ext->gfp_mask);
		1074	if (pageblock_mt != page_mt) {
		1075	if (is_migrate_cma(pageblock_mt))
		1076	count[MIGRATE_MOVABLE]++;
		1077	else
		1078	count[pageblock_mt]++;
		1079
		1080	pfn = block_end_pfn;
		1081	break;
		1082	}
		1083	pfn += (1UL << page_ext->order) - 1;
		1084	}
		1085	}
		1086
		1087	/* Print counts */
		1088	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
		1089	for (i = 0; i < MIGRATE_TYPES; i++)
		1090	seq_printf(m, "%12lu ", count[i]);
		1091	seq_putc(m, '\n');
		1092	}
		1093	#endif /* CONFIG_PAGE_OWNER */
		1094
		1095	/*
		1096	* Print out the number of pageblocks for each migratetype that contain pages
		1097	* of other types. This gives an indication of how well fallbacks are being
		1098	* contained by rmqueue_fallback(). It requires information from PAGE_OWNER
		1099	* to determine what is going on
		1100	*/
		1101	static void pagetypeinfo_showmixedcount(struct seq_file m, pg_data_t pgdat)
		1102	{
		1103	#ifdef CONFIG_PAGE_OWNER
		1104	int mtype;
		1105
		1106	if (!page_owner_inited)
		1107	return;
		1108
		1109	drain_all_pages(NULL);
		1110
		1111	seq_printf(m, "\n%-23s", "Number of mixed blocks ");
		1112	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
		1113	seq_printf(m, "%12s ", migratetype_names[mtype]);
		1114	seq_putc(m, '\n');
		1115
		1116	walk_zones_in_node(m, pgdat, pagetypeinfo_showmixedcount_print);
		1117	#endif /* CONFIG_PAGE_OWNER */
		1118	}
		1119
1020	/*	1120	/*
1021	* This prints out statistics in relation to grouping pages by mobility.	1121	* This prints out statistics in relation to grouping pages by mobility.
1022	* It is expensive to collect so do not constantly read the file.	1122	* It is expensive to collect so do not constantly read the file.
@@ -1034,6 +1134,7 @@ static int pagetypeinfo_show(struct seq_file m, void arg)
1034	seq_putc(m, '\n');	1134	seq_putc(m, '\n');
1035	pagetypeinfo_showfree(m, pgdat);	1135	pagetypeinfo_showfree(m, pgdat);
1036	pagetypeinfo_showblockcount(m, pgdat);	1136	pagetypeinfo_showblockcount(m, pgdat);
		1137	pagetypeinfo_showmixedcount(m, pgdat);
1037		1138
1038	return 0;	1139	return 0;
1039	}	1140	}