aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoonsoo Kim <iamjoonsoo.kim@lge.com>2016-03-17 17:19:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-17 18:09:34 -0400
commit95813b8faa0cd315f61a8b9d9c523792370b693e (patch)
treecc60847b726877919d3da68f7af8b32f8a1d8d2a
parentfe896d1878949ea92ba547587bc3075cc688fb8f (diff)
mm/page_ref: add tracepoint to track down page reference manipulation
CMA allocation should be guaranteed to succeed by definition, but, unfortunately, it would be failed sometimes. It is hard to track down the problem, because it is related to page reference manipulation and we don't have any facility to analyze it. This patch adds tracepoints to track down page reference manipulation. With it, we can find exact reason of failure and can fix the problem. Following is an example of tracepoint output. (note: this example is stale version that printing flags as the number. Recent version will print it as human readable string.) <...>-9018 [004] 92.678375: page_ref_set: pfn=0x17ac9 flags=0x0 count=1 mapcount=0 mapping=(nil) mt=4 val=1 <...>-9018 [004] 92.678378: kernel_stack: => get_page_from_freelist (ffffffff81176659) => __alloc_pages_nodemask (ffffffff81176d22) => alloc_pages_vma (ffffffff811bf675) => handle_mm_fault (ffffffff8119e693) => __do_page_fault (ffffffff810631ea) => trace_do_page_fault (ffffffff81063543) => do_async_page_fault (ffffffff8105c40a) => async_page_fault (ffffffff817581d8) [snip] <...>-9018 [004] 92.678379: page_ref_mod: pfn=0x17ac9 flags=0x40048 count=2 mapcount=1 mapping=0xffff880015a78dc1 mt=4 val=1 [snip] ... ... <...>-9131 [001] 93.174468: test_pages_isolated: start_pfn=0x17800 end_pfn=0x17c00 fin_pfn=0x17ac9 ret=fail [snip] <...>-9018 [004] 93.174843: page_ref_mod_and_test: pfn=0x17ac9 flags=0x40068 count=0 mapcount=0 mapping=0xffff880015a78dc1 mt=4 val=-1 ret=1 => release_pages (ffffffff8117c9e4) => free_pages_and_swap_cache (ffffffff811b0697) => tlb_flush_mmu_free (ffffffff81199616) => tlb_finish_mmu (ffffffff8119a62c) => exit_mmap (ffffffff811a53f7) => mmput (ffffffff81073f47) => do_exit (ffffffff810794e9) => do_group_exit (ffffffff81079def) => SyS_exit_group (ffffffff81079e74) => entry_SYSCALL_64_fastpath (ffffffff817560b6) This output shows that problem comes from exit path. In exit path, to improve performance, pages are not freed immediately. They are gathered and processed by batch. During this process, migration cannot be possible and CMA allocation is failed. This problem is hard to find without this page reference tracepoint facility. Enabling this feature bloat kernel text 30 KB in my configuration. text data bss dec hex filename 12127327 2243616 1507328 15878271 f2487f vmlinux_disabled 12157208 2258880 1507328 15923416 f2f8d8 vmlinux_enabled Note that, due to header file dependency problem between mm.h and tracepoint.h, this feature has to open code the static key functions for tracepoints. Proposed by Steven Rostedt in following link. https://lkml.org/lkml/2015/12/9/699 [arnd@arndb.de: crypto/async_pq: use __free_page() instead of put_page()] [iamjoonsoo.kim@lge.com: fix build failure for xtensa] [akpm@linux-foundation.org: tweak Kconfig text, per Vlastimil] Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com> Acked-by: Michal Nazarewicz <mina86@mina86.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Minchan Kim <minchan@kernel.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com> Acked-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--crypto/async_tx/async_pq.c2
-rw-r--r--include/linux/page_ref.h98
-rw-r--r--include/trace/events/page_ref.h134
-rw-r--r--mm/Kconfig.debug13
-rw-r--r--mm/Makefile1
-rw-r--r--mm/debug_page_ref.c54
6 files changed, 296 insertions, 6 deletions
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index c0748bbd4c08..08b3ac68952b 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -444,7 +444,7 @@ static int __init async_pq_init(void)
444 444
445static void __exit async_pq_exit(void) 445static void __exit async_pq_exit(void)
446{ 446{
447 put_page(pq_scribble_page); 447 __free_page(pq_scribble_page);
448} 448}
449 449
450module_init(async_pq_init); 450module_init(async_pq_init);
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 30f5817f6b8e..e596d5d9540e 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -4,6 +4,62 @@
4#include <linux/atomic.h> 4#include <linux/atomic.h>
5#include <linux/mm_types.h> 5#include <linux/mm_types.h>
6#include <linux/page-flags.h> 6#include <linux/page-flags.h>
7#include <linux/tracepoint-defs.h>
8
9extern struct tracepoint __tracepoint_page_ref_set;
10extern struct tracepoint __tracepoint_page_ref_mod;
11extern struct tracepoint __tracepoint_page_ref_mod_and_test;
12extern struct tracepoint __tracepoint_page_ref_mod_and_return;
13extern struct tracepoint __tracepoint_page_ref_mod_unless;
14extern struct tracepoint __tracepoint_page_ref_freeze;
15extern struct tracepoint __tracepoint_page_ref_unfreeze;
16
17#ifdef CONFIG_DEBUG_PAGE_REF
18
19/*
20 * Ideally we would want to use the trace_<tracepoint>_enabled() helper
21 * functions. But due to include header file issues, that is not
22 * feasible. Instead we have to open code the static key functions.
23 *
24 * See trace_##name##_enabled(void) in include/linux/tracepoint.h
25 */
26#define page_ref_tracepoint_active(t) static_key_false(&(t).key)
27
28extern void __page_ref_set(struct page *page, int v);
29extern void __page_ref_mod(struct page *page, int v);
30extern void __page_ref_mod_and_test(struct page *page, int v, int ret);
31extern void __page_ref_mod_and_return(struct page *page, int v, int ret);
32extern void __page_ref_mod_unless(struct page *page, int v, int u);
33extern void __page_ref_freeze(struct page *page, int v, int ret);
34extern void __page_ref_unfreeze(struct page *page, int v);
35
36#else
37
38#define page_ref_tracepoint_active(t) false
39
40static inline void __page_ref_set(struct page *page, int v)
41{
42}
43static inline void __page_ref_mod(struct page *page, int v)
44{
45}
46static inline void __page_ref_mod_and_test(struct page *page, int v, int ret)
47{
48}
49static inline void __page_ref_mod_and_return(struct page *page, int v, int ret)
50{
51}
52static inline void __page_ref_mod_unless(struct page *page, int v, int u)
53{
54}
55static inline void __page_ref_freeze(struct page *page, int v, int ret)
56{
57}
58static inline void __page_ref_unfreeze(struct page *page, int v)
59{
60}
61
62#endif
7 63
8static inline int page_ref_count(struct page *page) 64static inline int page_ref_count(struct page *page)
9{ 65{
@@ -18,6 +74,8 @@ static inline int page_count(struct page *page)
18static inline void set_page_count(struct page *page, int v) 74static inline void set_page_count(struct page *page, int v)
19{ 75{
20 atomic_set(&page->_count, v); 76 atomic_set(&page->_count, v);
77 if (page_ref_tracepoint_active(__tracepoint_page_ref_set))
78 __page_ref_set(page, v);
21} 79}
22 80
23/* 81/*
@@ -32,46 +90,74 @@ static inline void init_page_count(struct page *page)
32static inline void page_ref_add(struct page *page, int nr) 90static inline void page_ref_add(struct page *page, int nr)
33{ 91{
34 atomic_add(nr, &page->_count); 92 atomic_add(nr, &page->_count);
93 if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
94 __page_ref_mod(page, nr);
35} 95}
36 96
37static inline void page_ref_sub(struct page *page, int nr) 97static inline void page_ref_sub(struct page *page, int nr)
38{ 98{
39 atomic_sub(nr, &page->_count); 99 atomic_sub(nr, &page->_count);
100 if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
101 __page_ref_mod(page, -nr);
40} 102}
41 103
42static inline void page_ref_inc(struct page *page) 104static inline void page_ref_inc(struct page *page)
43{ 105{
44 atomic_inc(&page->_count); 106 atomic_inc(&page->_count);
107 if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
108 __page_ref_mod(page, 1);
45} 109}
46 110
47static inline void page_ref_dec(struct page *page) 111static inline void page_ref_dec(struct page *page)
48{ 112{
49 atomic_dec(&page->_count); 113 atomic_dec(&page->_count);
114 if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
115 __page_ref_mod(page, -1);
50} 116}
51 117
52static inline int page_ref_sub_and_test(struct page *page, int nr) 118static inline int page_ref_sub_and_test(struct page *page, int nr)
53{ 119{
54 return atomic_sub_and_test(nr, &page->_count); 120 int ret = atomic_sub_and_test(nr, &page->_count);
121
122 if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_test))
123 __page_ref_mod_and_test(page, -nr, ret);
124 return ret;
55} 125}
56 126
57static inline int page_ref_dec_and_test(struct page *page) 127static inline int page_ref_dec_and_test(struct page *page)
58{ 128{
59 return atomic_dec_and_test(&page->_count); 129 int ret = atomic_dec_and_test(&page->_count);
130
131 if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_test))
132 __page_ref_mod_and_test(page, -1, ret);
133 return ret;
60} 134}
61 135
62static inline int page_ref_dec_return(struct page *page) 136static inline int page_ref_dec_return(struct page *page)
63{ 137{
64 return atomic_dec_return(&page->_count); 138 int ret = atomic_dec_return(&page->_count);
139
140 if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_return))
141 __page_ref_mod_and_return(page, -1, ret);
142 return ret;
65} 143}
66 144
67static inline int page_ref_add_unless(struct page *page, int nr, int u) 145static inline int page_ref_add_unless(struct page *page, int nr, int u)
68{ 146{
69 return atomic_add_unless(&page->_count, nr, u); 147 int ret = atomic_add_unless(&page->_count, nr, u);
148
149 if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_unless))
150 __page_ref_mod_unless(page, nr, ret);
151 return ret;
70} 152}
71 153
72static inline int page_ref_freeze(struct page *page, int count) 154static inline int page_ref_freeze(struct page *page, int count)
73{ 155{
74 return likely(atomic_cmpxchg(&page->_count, count, 0) == count); 156 int ret = likely(atomic_cmpxchg(&page->_count, count, 0) == count);
157
158 if (page_ref_tracepoint_active(__tracepoint_page_ref_freeze))
159 __page_ref_freeze(page, count, ret);
160 return ret;
75} 161}
76 162
77static inline void page_ref_unfreeze(struct page *page, int count) 163static inline void page_ref_unfreeze(struct page *page, int count)
@@ -80,6 +166,8 @@ static inline void page_ref_unfreeze(struct page *page, int count)
80 VM_BUG_ON(count == 0); 166 VM_BUG_ON(count == 0);
81 167
82 atomic_set(&page->_count, count); 168 atomic_set(&page->_count, count);
169 if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze))
170 __page_ref_unfreeze(page, count);
83} 171}
84 172
85#endif 173#endif
diff --git a/include/trace/events/page_ref.h b/include/trace/events/page_ref.h
new file mode 100644
index 000000000000..81001f8b0db4
--- /dev/null
+++ b/include/trace/events/page_ref.h
@@ -0,0 +1,134 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM page_ref
3
4#if !defined(_TRACE_PAGE_REF_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_PAGE_REF_H
6
7#include <linux/types.h>
8#include <linux/page_ref.h>
9#include <linux/tracepoint.h>
10#include <trace/events/mmflags.h>
11
12DECLARE_EVENT_CLASS(page_ref_mod_template,
13
14 TP_PROTO(struct page *page, int v),
15
16 TP_ARGS(page, v),
17
18 TP_STRUCT__entry(
19 __field(unsigned long, pfn)
20 __field(unsigned long, flags)
21 __field(int, count)
22 __field(int, mapcount)
23 __field(void *, mapping)
24 __field(int, mt)
25 __field(int, val)
26 ),
27
28 TP_fast_assign(
29 __entry->pfn = page_to_pfn(page);
30 __entry->flags = page->flags;
31 __entry->count = page_ref_count(page);
32 __entry->mapcount = page_mapcount(page);
33 __entry->mapping = page->mapping;
34 __entry->mt = get_pageblock_migratetype(page);
35 __entry->val = v;
36 ),
37
38 TP_printk("pfn=0x%lx flags=%s count=%d mapcount=%d mapping=%p mt=%d val=%d",
39 __entry->pfn,
40 show_page_flags(__entry->flags & ((1UL << NR_PAGEFLAGS) - 1)),
41 __entry->count,
42 __entry->mapcount, __entry->mapping, __entry->mt,
43 __entry->val)
44);
45
46DEFINE_EVENT(page_ref_mod_template, page_ref_set,
47
48 TP_PROTO(struct page *page, int v),
49
50 TP_ARGS(page, v)
51);
52
53DEFINE_EVENT(page_ref_mod_template, page_ref_mod,
54
55 TP_PROTO(struct page *page, int v),
56
57 TP_ARGS(page, v)
58);
59
60DECLARE_EVENT_CLASS(page_ref_mod_and_test_template,
61
62 TP_PROTO(struct page *page, int v, int ret),
63
64 TP_ARGS(page, v, ret),
65
66 TP_STRUCT__entry(
67 __field(unsigned long, pfn)
68 __field(unsigned long, flags)
69 __field(int, count)
70 __field(int, mapcount)
71 __field(void *, mapping)
72 __field(int, mt)
73 __field(int, val)
74 __field(int, ret)
75 ),
76
77 TP_fast_assign(
78 __entry->pfn = page_to_pfn(page);
79 __entry->flags = page->flags;
80 __entry->count = page_ref_count(page);
81 __entry->mapcount = page_mapcount(page);
82 __entry->mapping = page->mapping;
83 __entry->mt = get_pageblock_migratetype(page);
84 __entry->val = v;
85 __entry->ret = ret;
86 ),
87
88 TP_printk("pfn=0x%lx flags=%s count=%d mapcount=%d mapping=%p mt=%d val=%d ret=%d",
89 __entry->pfn,
90 show_page_flags(__entry->flags & ((1UL << NR_PAGEFLAGS) - 1)),
91 __entry->count,
92 __entry->mapcount, __entry->mapping, __entry->mt,
93 __entry->val, __entry->ret)
94);
95
96DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_mod_and_test,
97
98 TP_PROTO(struct page *page, int v, int ret),
99
100 TP_ARGS(page, v, ret)
101);
102
103DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_mod_and_return,
104
105 TP_PROTO(struct page *page, int v, int ret),
106
107 TP_ARGS(page, v, ret)
108);
109
110DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_mod_unless,
111
112 TP_PROTO(struct page *page, int v, int ret),
113
114 TP_ARGS(page, v, ret)
115);
116
117DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_freeze,
118
119 TP_PROTO(struct page *page, int v, int ret),
120
121 TP_ARGS(page, v, ret)
122);
123
124DEFINE_EVENT(page_ref_mod_template, page_ref_unfreeze,
125
126 TP_PROTO(struct page *page, int v),
127
128 TP_ARGS(page, v)
129);
130
131#endif /* _TRACE_PAGE_COUNT_H */
132
133/* This part must be outside protection */
134#include <trace/define_trace.h>
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 5c50b238b770..22f4cd96acb0 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -79,3 +79,16 @@ config PAGE_POISONING_ZERO
79 Enabling page poisoning with this option will disable hibernation 79 Enabling page poisoning with this option will disable hibernation
80 80
81 If unsure, say N 81 If unsure, say N
82 bool
83
84config DEBUG_PAGE_REF
85 bool "Enable tracepoint to track down page reference manipulation"
86 depends on DEBUG_KERNEL
87 depends on TRACEPOINTS
88 ---help---
89 This is a feature to add tracepoint for tracking down page reference
90 manipulation. This tracking is useful to diagnose functional failure
91 due to migration failures caused by page reference mismatches. Be
92 careful when enabling this feature because it adds about 30 KB to the
93 kernel code. However the runtime performance overhead is virtually
94 nil until the tracepoints are actually enabled.
diff --git a/mm/Makefile b/mm/Makefile
index cfdd481d27a5..6da300a1414b 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -81,3 +81,4 @@ obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
81obj-$(CONFIG_USERFAULTFD) += userfaultfd.o 81obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
82obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o 82obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
83obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o 83obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
84obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
diff --git a/mm/debug_page_ref.c b/mm/debug_page_ref.c
new file mode 100644
index 000000000000..1aef3d562e52
--- /dev/null
+++ b/mm/debug_page_ref.c
@@ -0,0 +1,54 @@
1#include <linux/mm_types.h>
2#include <linux/tracepoint.h>
3
4#define CREATE_TRACE_POINTS
5#include <trace/events/page_ref.h>
6
7void __page_ref_set(struct page *page, int v)
8{
9 trace_page_ref_set(page, v);
10}
11EXPORT_SYMBOL(__page_ref_set);
12EXPORT_TRACEPOINT_SYMBOL(page_ref_set);
13
14void __page_ref_mod(struct page *page, int v)
15{
16 trace_page_ref_mod(page, v);
17}
18EXPORT_SYMBOL(__page_ref_mod);
19EXPORT_TRACEPOINT_SYMBOL(page_ref_mod);
20
21void __page_ref_mod_and_test(struct page *page, int v, int ret)
22{
23 trace_page_ref_mod_and_test(page, v, ret);
24}
25EXPORT_SYMBOL(__page_ref_mod_and_test);
26EXPORT_TRACEPOINT_SYMBOL(page_ref_mod_and_test);
27
28void __page_ref_mod_and_return(struct page *page, int v, int ret)
29{
30 trace_page_ref_mod_and_return(page, v, ret);
31}
32EXPORT_SYMBOL(__page_ref_mod_and_return);
33EXPORT_TRACEPOINT_SYMBOL(page_ref_mod_and_return);
34
35void __page_ref_mod_unless(struct page *page, int v, int u)
36{
37 trace_page_ref_mod_unless(page, v, u);
38}
39EXPORT_SYMBOL(__page_ref_mod_unless);
40EXPORT_TRACEPOINT_SYMBOL(page_ref_mod_unless);
41
42void __page_ref_freeze(struct page *page, int v, int ret)
43{
44 trace_page_ref_freeze(page, v, ret);
45}
46EXPORT_SYMBOL(__page_ref_freeze);
47EXPORT_TRACEPOINT_SYMBOL(page_ref_freeze);
48
49void __page_ref_unfreeze(struct page *page, int v)
50{
51 trace_page_ref_unfreeze(page, v);
52}
53EXPORT_SYMBOL(__page_ref_unfreeze);
54EXPORT_TRACEPOINT_SYMBOL(page_ref_unfreeze);