aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJoonsoo Kim <iamjoonsoo.kim@lge.com>2014-12-12 19:56:01 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-13 15:42:48 -0500
commit48c96a3685795e52903e60c7ee115e5e22e7d640 (patch)
tree49940b1971c9b487a52b2c91b2423eee9278ced5 /mm
parent9a92a6ce6f842713ccd0025c5228fe8bea61234c (diff)
mm/page_owner: keep track of page owners
This is the page owner tracking code which is introduced so far ago. It is resident on Andrew's tree, though, nobody tried to upstream so it remain as is. Our company uses this feature actively to debug memory leak or to find a memory hogger so I decide to upstream this feature. This functionality help us to know who allocates the page. When allocating a page, we store some information about allocation in extra memory. Later, if we need to know status of all pages, we can get and analyze it from this stored information. In previous version of this feature, extra memory is statically defined in struct page, but, in this version, extra memory is allocated outside of struct page. It enables us to turn on/off this feature at boottime without considerable memory waste. Although we already have tracepoint for tracing page allocation/free, using it to analyze page owner is rather complex. We need to enlarge the trace buffer for preventing overlapping until userspace program launched. And, launched program continually dump out the trace buffer for later analysis and it would change system behaviour with more possibility rather than just keeping it in memory, so bad for debug. Moreover, we can use page_owner feature further for various purposes. For example, we can use it for fragmentation statistics implemented in this patch. And, I also plan to implement some CMA failure debugging feature using this interface. I'd like to give the credit for all developers contributed this feature, but, it's not easy because I don't know exact history. Sorry about that. Below is people who has "Signed-off-by" in the patches in Andrew's tree. Contributor: Alexander Nyberg <alexn@dsv.su.se> Mel Gorman <mgorman@suse.de> Dave Hansen <dave@linux.vnet.ibm.com> Minchan Kim <minchan@kernel.org> Michal Nazarewicz <mina86@mina86.com> Andrew Morton <akpm@linux-foundation.org> Jungsoo Son <jungsoo.son@lge.com> Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Dave Hansen <dave@sr71.net> Cc: Michal Nazarewicz <mina86@mina86.com> Cc: Jungsoo Son <jungsoo.son@lge.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile1
-rw-r--r--mm/page_alloc.c11
-rw-r--r--mm/page_ext.c4
-rw-r--r--mm/page_owner.c222
-rw-r--r--mm/vmstat.c101
5 files changed, 338 insertions, 1 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 580cd3f392af..4bf586e66378 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
63obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o 63obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
64obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o 64obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
65obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o 65obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
66obj-$(CONFIG_PAGE_OWNER) += page_owner.o
66obj-$(CONFIG_CLEANCACHE) += cleancache.o 67obj-$(CONFIG_CLEANCACHE) += cleancache.o
67obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o 68obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
68obj-$(CONFIG_ZPOOL) += zpool.o 69obj-$(CONFIG_ZPOOL) += zpool.o
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 303d38516807..c13b6b29add2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -59,6 +59,7 @@
59#include <linux/page_ext.h> 59#include <linux/page_ext.h>
60#include <linux/hugetlb.h> 60#include <linux/hugetlb.h>
61#include <linux/sched/rt.h> 61#include <linux/sched/rt.h>
62#include <linux/page_owner.h>
62 63
63#include <asm/sections.h> 64#include <asm/sections.h>
64#include <asm/tlbflush.h> 65#include <asm/tlbflush.h>
@@ -813,6 +814,8 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
813 if (bad) 814 if (bad)
814 return false; 815 return false;
815 816
817 reset_page_owner(page, order);
818
816 if (!PageHighMem(page)) { 819 if (!PageHighMem(page)) {
817 debug_check_no_locks_freed(page_address(page), 820 debug_check_no_locks_freed(page_address(page),
818 PAGE_SIZE << order); 821 PAGE_SIZE << order);
@@ -988,6 +991,8 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags)
988 if (order && (gfp_flags & __GFP_COMP)) 991 if (order && (gfp_flags & __GFP_COMP))
989 prep_compound_page(page, order); 992 prep_compound_page(page, order);
990 993
994 set_page_owner(page, order, gfp_flags);
995
991 return 0; 996 return 0;
992} 997}
993 998
@@ -1560,8 +1565,11 @@ void split_page(struct page *page, unsigned int order)
1560 split_page(virt_to_page(page[0].shadow), order); 1565 split_page(virt_to_page(page[0].shadow), order);
1561#endif 1566#endif
1562 1567
1563 for (i = 1; i < (1 << order); i++) 1568 set_page_owner(page, 0, 0);
1569 for (i = 1; i < (1 << order); i++) {
1564 set_page_refcounted(page + i); 1570 set_page_refcounted(page + i);
1571 set_page_owner(page + i, 0, 0);
1572 }
1565} 1573}
1566EXPORT_SYMBOL_GPL(split_page); 1574EXPORT_SYMBOL_GPL(split_page);
1567 1575
@@ -1601,6 +1609,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
1601 } 1609 }
1602 } 1610 }
1603 1611
1612 set_page_owner(page, order, 0);
1604 return 1UL << order; 1613 return 1UL << order;
1605} 1614}
1606 1615
diff --git a/mm/page_ext.c b/mm/page_ext.c
index c2cd7b15f0de..d86fd2f5353f 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -5,6 +5,7 @@
5#include <linux/memory.h> 5#include <linux/memory.h>
6#include <linux/vmalloc.h> 6#include <linux/vmalloc.h>
7#include <linux/kmemleak.h> 7#include <linux/kmemleak.h>
8#include <linux/page_owner.h>
8 9
9/* 10/*
10 * struct page extension 11 * struct page extension
@@ -55,6 +56,9 @@ static struct page_ext_operations *page_ext_ops[] = {
55#ifdef CONFIG_PAGE_POISONING 56#ifdef CONFIG_PAGE_POISONING
56 &page_poisoning_ops, 57 &page_poisoning_ops,
57#endif 58#endif
59#ifdef CONFIG_PAGE_OWNER
60 &page_owner_ops,
61#endif
58}; 62};
59 63
60static unsigned long total_usage; 64static unsigned long total_usage;
diff --git a/mm/page_owner.c b/mm/page_owner.c
new file mode 100644
index 000000000000..85eec7ea6735
--- /dev/null
+++ b/mm/page_owner.c
@@ -0,0 +1,222 @@
1#include <linux/debugfs.h>
2#include <linux/mm.h>
3#include <linux/slab.h>
4#include <linux/uaccess.h>
5#include <linux/bootmem.h>
6#include <linux/stacktrace.h>
7#include <linux/page_owner.h>
8#include "internal.h"
9
10static bool page_owner_disabled = true;
11bool page_owner_inited __read_mostly;
12
13static int early_page_owner_param(char *buf)
14{
15 if (!buf)
16 return -EINVAL;
17
18 if (strcmp(buf, "on") == 0)
19 page_owner_disabled = false;
20
21 return 0;
22}
23early_param("page_owner", early_page_owner_param);
24
25static bool need_page_owner(void)
26{
27 if (page_owner_disabled)
28 return false;
29
30 return true;
31}
32
33static void init_page_owner(void)
34{
35 if (page_owner_disabled)
36 return;
37
38 page_owner_inited = true;
39}
40
41struct page_ext_operations page_owner_ops = {
42 .need = need_page_owner,
43 .init = init_page_owner,
44};
45
46void __reset_page_owner(struct page *page, unsigned int order)
47{
48 int i;
49 struct page_ext *page_ext;
50
51 for (i = 0; i < (1 << order); i++) {
52 page_ext = lookup_page_ext(page + i);
53 __clear_bit(PAGE_EXT_OWNER, &page_ext->flags);
54 }
55}
56
57void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask)
58{
59 struct page_ext *page_ext;
60 struct stack_trace *trace;
61
62 page_ext = lookup_page_ext(page);
63
64 trace = &page_ext->trace;
65 trace->nr_entries = 0;
66 trace->max_entries = ARRAY_SIZE(page_ext->trace_entries);
67 trace->entries = &page_ext->trace_entries[0];
68 trace->skip = 3;
69 save_stack_trace(&page_ext->trace);
70
71 page_ext->order = order;
72 page_ext->gfp_mask = gfp_mask;
73
74 __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
75}
76
77static ssize_t
78print_page_owner(char __user *buf, size_t count, unsigned long pfn,
79 struct page *page, struct page_ext *page_ext)
80{
81 int ret;
82 int pageblock_mt, page_mt;
83 char *kbuf;
84
85 kbuf = kmalloc(count, GFP_KERNEL);
86 if (!kbuf)
87 return -ENOMEM;
88
89 ret = snprintf(kbuf, count,
90 "Page allocated via order %u, mask 0x%x\n",
91 page_ext->order, page_ext->gfp_mask);
92
93 if (ret >= count)
94 goto err;
95
96 /* Print information relevant to grouping pages by mobility */
97 pageblock_mt = get_pfnblock_migratetype(page, pfn);
98 page_mt = gfpflags_to_migratetype(page_ext->gfp_mask);
99 ret += snprintf(kbuf + ret, count - ret,
100 "PFN %lu Block %lu type %d %s Flags %s%s%s%s%s%s%s%s%s%s%s%s\n",
101 pfn,
102 pfn >> pageblock_order,
103 pageblock_mt,
104 pageblock_mt != page_mt ? "Fallback" : " ",
105 PageLocked(page) ? "K" : " ",
106 PageError(page) ? "E" : " ",
107 PageReferenced(page) ? "R" : " ",
108 PageUptodate(page) ? "U" : " ",
109 PageDirty(page) ? "D" : " ",
110 PageLRU(page) ? "L" : " ",
111 PageActive(page) ? "A" : " ",
112 PageSlab(page) ? "S" : " ",
113 PageWriteback(page) ? "W" : " ",
114 PageCompound(page) ? "C" : " ",
115 PageSwapCache(page) ? "B" : " ",
116 PageMappedToDisk(page) ? "M" : " ");
117
118 if (ret >= count)
119 goto err;
120
121 ret += snprint_stack_trace(kbuf + ret, count - ret,
122 &page_ext->trace, 0);
123 if (ret >= count)
124 goto err;
125
126 ret += snprintf(kbuf + ret, count - ret, "\n");
127 if (ret >= count)
128 goto err;
129
130 if (copy_to_user(buf, kbuf, ret))
131 ret = -EFAULT;
132
133 kfree(kbuf);
134 return ret;
135
136err:
137 kfree(kbuf);
138 return -ENOMEM;
139}
140
141static ssize_t
142read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
143{
144 unsigned long pfn;
145 struct page *page;
146 struct page_ext *page_ext;
147
148 if (!page_owner_inited)
149 return -EINVAL;
150
151 page = NULL;
152 pfn = min_low_pfn + *ppos;
153
154 /* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */
155 while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0)
156 pfn++;
157
158 drain_all_pages(NULL);
159
160 /* Find an allocated page */
161 for (; pfn < max_pfn; pfn++) {
162 /*
163 * If the new page is in a new MAX_ORDER_NR_PAGES area,
164 * validate the area as existing, skip it if not
165 */
166 if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) {
167 pfn += MAX_ORDER_NR_PAGES - 1;
168 continue;
169 }
170
171 /* Check for holes within a MAX_ORDER area */
172 if (!pfn_valid_within(pfn))
173 continue;
174
175 page = pfn_to_page(pfn);
176 if (PageBuddy(page)) {
177 unsigned long freepage_order = page_order_unsafe(page);
178
179 if (freepage_order < MAX_ORDER)
180 pfn += (1UL << freepage_order) - 1;
181 continue;
182 }
183
184 page_ext = lookup_page_ext(page);
185
186 /*
187 * Pages allocated before initialization of page_owner are
188 * non-buddy and have no page_owner info.
189 */
190 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
191 continue;
192
193 /* Record the next PFN to read in the file offset */
194 *ppos = (pfn - min_low_pfn) + 1;
195
196 return print_page_owner(buf, count, pfn, page, page_ext);
197 }
198
199 return 0;
200}
201
202static const struct file_operations proc_page_owner_operations = {
203 .read = read_page_owner,
204};
205
206static int __init pageowner_init(void)
207{
208 struct dentry *dentry;
209
210 if (!page_owner_inited) {
211 pr_info("page_owner is disabled\n");
212 return 0;
213 }
214
215 dentry = debugfs_create_file("page_owner", S_IRUSR, NULL,
216 NULL, &proc_page_owner_operations);
217 if (IS_ERR(dentry))
218 return PTR_ERR(dentry);
219
220 return 0;
221}
222module_init(pageowner_init)
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 1b12d390dc68..b090e9e3d626 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -22,6 +22,8 @@
22#include <linux/writeback.h> 22#include <linux/writeback.h>
23#include <linux/compaction.h> 23#include <linux/compaction.h>
24#include <linux/mm_inline.h> 24#include <linux/mm_inline.h>
25#include <linux/page_ext.h>
26#include <linux/page_owner.h>
25 27
26#include "internal.h" 28#include "internal.h"
27 29
@@ -1017,6 +1019,104 @@ static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1017 return 0; 1019 return 0;
1018} 1020}
1019 1021
1022#ifdef CONFIG_PAGE_OWNER
1023static void pagetypeinfo_showmixedcount_print(struct seq_file *m,
1024 pg_data_t *pgdat,
1025 struct zone *zone)
1026{
1027 struct page *page;
1028 struct page_ext *page_ext;
1029 unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
1030 unsigned long end_pfn = pfn + zone->spanned_pages;
1031 unsigned long count[MIGRATE_TYPES] = { 0, };
1032 int pageblock_mt, page_mt;
1033 int i;
1034
1035 /* Scan block by block. First and last block may be incomplete */
1036 pfn = zone->zone_start_pfn;
1037
1038 /*
1039 * Walk the zone in pageblock_nr_pages steps. If a page block spans
1040 * a zone boundary, it will be double counted between zones. This does
1041 * not matter as the mixed block count will still be correct
1042 */
1043 for (; pfn < end_pfn; ) {
1044 if (!pfn_valid(pfn)) {
1045 pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
1046 continue;
1047 }
1048
1049 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
1050 block_end_pfn = min(block_end_pfn, end_pfn);
1051
1052 page = pfn_to_page(pfn);
1053 pageblock_mt = get_pfnblock_migratetype(page, pfn);
1054
1055 for (; pfn < block_end_pfn; pfn++) {
1056 if (!pfn_valid_within(pfn))
1057 continue;
1058
1059 page = pfn_to_page(pfn);
1060 if (PageBuddy(page)) {
1061 pfn += (1UL << page_order(page)) - 1;
1062 continue;
1063 }
1064
1065 if (PageReserved(page))
1066 continue;
1067
1068 page_ext = lookup_page_ext(page);
1069
1070 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
1071 continue;
1072
1073 page_mt = gfpflags_to_migratetype(page_ext->gfp_mask);
1074 if (pageblock_mt != page_mt) {
1075 if (is_migrate_cma(pageblock_mt))
1076 count[MIGRATE_MOVABLE]++;
1077 else
1078 count[pageblock_mt]++;
1079
1080 pfn = block_end_pfn;
1081 break;
1082 }
1083 pfn += (1UL << page_ext->order) - 1;
1084 }
1085 }
1086
1087 /* Print counts */
1088 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1089 for (i = 0; i < MIGRATE_TYPES; i++)
1090 seq_printf(m, "%12lu ", count[i]);
1091 seq_putc(m, '\n');
1092}
1093#endif /* CONFIG_PAGE_OWNER */
1094
1095/*
1096 * Print out the number of pageblocks for each migratetype that contain pages
1097 * of other types. This gives an indication of how well fallbacks are being
1098 * contained by rmqueue_fallback(). It requires information from PAGE_OWNER
1099 * to determine what is going on
1100 */
1101static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
1102{
1103#ifdef CONFIG_PAGE_OWNER
1104 int mtype;
1105
1106 if (!page_owner_inited)
1107 return;
1108
1109 drain_all_pages(NULL);
1110
1111 seq_printf(m, "\n%-23s", "Number of mixed blocks ");
1112 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1113 seq_printf(m, "%12s ", migratetype_names[mtype]);
1114 seq_putc(m, '\n');
1115
1116 walk_zones_in_node(m, pgdat, pagetypeinfo_showmixedcount_print);
1117#endif /* CONFIG_PAGE_OWNER */
1118}
1119
1020/* 1120/*
1021 * This prints out statistics in relation to grouping pages by mobility. 1121 * This prints out statistics in relation to grouping pages by mobility.
1022 * It is expensive to collect so do not constantly read the file. 1122 * It is expensive to collect so do not constantly read the file.
@@ -1034,6 +1134,7 @@ static int pagetypeinfo_show(struct seq_file *m, void *arg)
1034 seq_putc(m, '\n'); 1134 seq_putc(m, '\n');
1035 pagetypeinfo_showfree(m, pgdat); 1135 pagetypeinfo_showfree(m, pgdat);
1036 pagetypeinfo_showblockcount(m, pgdat); 1136 pagetypeinfo_showblockcount(m, pgdat);
1137 pagetypeinfo_showmixedcount(m, pgdat);
1037 1138
1038 return 0; 1139 return 0;
1039} 1140}