summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/Kconfig8
-rw-r--r--mm/Makefile1
-rw-r--r--mm/percpu-internal.h131
-rw-r--r--mm/percpu-km.c4
-rw-r--r--mm/percpu-stats.c222
-rw-r--r--mm/percpu-vm.c5
-rw-r--r--mm/percpu.c9
7 files changed, 380 insertions, 0 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index beb7a455915d..8fae42606d56 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -706,3 +706,11 @@ config ARCH_USES_HIGH_VMA_FLAGS
706 bool 706 bool
707config ARCH_HAS_PKEYS 707config ARCH_HAS_PKEYS
708 bool 708 bool
709
710config PERCPU_STATS
711 bool "Collect percpu memory statistics"
712 default n
713 help
714 This feature collects and exposes statistics via debugfs. The
715 information includes global and per chunk statistics, which can
716 be used to help understand percpu memory usage.
diff --git a/mm/Makefile b/mm/Makefile
index 026f6a828a50..411bd24d4a7c 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -103,3 +103,4 @@ obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
103obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o 103obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
104obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o 104obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
105obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o 105obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
106obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index 68bf18522a6d..d030fce745a2 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -5,6 +5,11 @@
5#include <linux/percpu.h> 5#include <linux/percpu.h>
6 6
7struct pcpu_chunk { 7struct pcpu_chunk {
8#ifdef CONFIG_PERCPU_STATS
9 int nr_alloc; /* # of allocations */
10 size_t max_alloc_size; /* largest allocation size */
11#endif
12
8 struct list_head list; /* linked to pcpu_slot lists */ 13 struct list_head list; /* linked to pcpu_slot lists */
9 int free_size; /* free bytes in the chunk */ 14 int free_size; /* free bytes in the chunk */
10 int contig_hint; /* max contiguous size hint */ 15 int contig_hint; /* max contiguous size hint */
@@ -18,6 +23,11 @@ struct pcpu_chunk {
18 void *data; /* chunk data */ 23 void *data; /* chunk data */
19 int first_free; /* no free below this */ 24 int first_free; /* no free below this */
20 bool immutable; /* no [de]population allowed */ 25 bool immutable; /* no [de]population allowed */
26 bool has_reserved; /* Indicates if chunk has reserved space
27 at the beginning. Reserved chunk will
28 contain reservation for static chunk.
29 Dynamic chunk will contain reservation
30 for static and reserved chunks. */
21 int nr_populated; /* # of populated pages */ 31 int nr_populated; /* # of populated pages */
22 unsigned long populated[]; /* populated bitmap */ 32 unsigned long populated[]; /* populated bitmap */
23}; 33};
@@ -30,4 +40,125 @@ extern int pcpu_nr_slots;
30extern struct pcpu_chunk *pcpu_first_chunk; 40extern struct pcpu_chunk *pcpu_first_chunk;
31extern struct pcpu_chunk *pcpu_reserved_chunk; 41extern struct pcpu_chunk *pcpu_reserved_chunk;
32 42
43#ifdef CONFIG_PERCPU_STATS
44
45#include <linux/spinlock.h>
46
47struct percpu_stats {
48 u64 nr_alloc; /* lifetime # of allocations */
49 u64 nr_dealloc; /* lifetime # of deallocations */
50 u64 nr_cur_alloc; /* current # of allocations */
51 u64 nr_max_alloc; /* max # of live allocations */
52 u32 nr_chunks; /* current # of live chunks */
53 u32 nr_max_chunks; /* max # of live chunks */
54 size_t min_alloc_size; /* min allocaiton size */
55 size_t max_alloc_size; /* max allocation size */
56};
57
58extern struct percpu_stats pcpu_stats;
59extern struct pcpu_alloc_info pcpu_stats_ai;
60
61/*
62 * For debug purposes. We don't care about the flexible array.
63 */
64static inline void pcpu_stats_save_ai(const struct pcpu_alloc_info *ai)
65{
66 memcpy(&pcpu_stats_ai, ai, sizeof(struct pcpu_alloc_info));
67
68 /* initialize min_alloc_size to unit_size */
69 pcpu_stats.min_alloc_size = pcpu_stats_ai.unit_size;
70}
71
72/*
73 * pcpu_stats_area_alloc - increment area allocation stats
74 * @chunk: the location of the area being allocated
75 * @size: size of area to allocate in bytes
76 *
77 * CONTEXT:
78 * pcpu_lock.
79 */
80static inline void pcpu_stats_area_alloc(struct pcpu_chunk *chunk, size_t size)
81{
82 lockdep_assert_held(&pcpu_lock);
83
84 pcpu_stats.nr_alloc++;
85 pcpu_stats.nr_cur_alloc++;
86 pcpu_stats.nr_max_alloc =
87 max(pcpu_stats.nr_max_alloc, pcpu_stats.nr_cur_alloc);
88 pcpu_stats.min_alloc_size =
89 min(pcpu_stats.min_alloc_size, size);
90 pcpu_stats.max_alloc_size =
91 max(pcpu_stats.max_alloc_size, size);
92
93 chunk->nr_alloc++;
94 chunk->max_alloc_size = max(chunk->max_alloc_size, size);
95}
96
97/*
98 * pcpu_stats_area_dealloc - decrement allocation stats
99 * @chunk: the location of the area being deallocated
100 *
101 * CONTEXT:
102 * pcpu_lock.
103 */
104static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk)
105{
106 lockdep_assert_held(&pcpu_lock);
107
108 pcpu_stats.nr_dealloc++;
109 pcpu_stats.nr_cur_alloc--;
110
111 chunk->nr_alloc--;
112}
113
114/*
115 * pcpu_stats_chunk_alloc - increment chunk stats
116 */
117static inline void pcpu_stats_chunk_alloc(void)
118{
119 spin_lock_irq(&pcpu_lock);
120
121 pcpu_stats.nr_chunks++;
122 pcpu_stats.nr_max_chunks =
123 max(pcpu_stats.nr_max_chunks, pcpu_stats.nr_chunks);
124
125 spin_unlock_irq(&pcpu_lock);
126}
127
128/*
129 * pcpu_stats_chunk_dealloc - decrement chunk stats
130 */
131static inline void pcpu_stats_chunk_dealloc(void)
132{
133 spin_lock_irq(&pcpu_lock);
134
135 pcpu_stats.nr_chunks--;
136
137 spin_unlock_irq(&pcpu_lock);
138}
139
140#else
141
142static inline void pcpu_stats_save_ai(const struct pcpu_alloc_info *ai)
143{
144}
145
146static inline void pcpu_stats_area_alloc(struct pcpu_chunk *chunk, size_t size)
147{
148}
149
150static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk)
151{
152}
153
154static inline void pcpu_stats_chunk_alloc(void)
155{
156}
157
158static inline void pcpu_stats_chunk_dealloc(void)
159{
160}
161
162#endif /* !CONFIG_PERCPU_STATS */
163
33#endif 164#endif
diff --git a/mm/percpu-km.c b/mm/percpu-km.c
index d66911ff42d9..3bbfa0c9d069 100644
--- a/mm/percpu-km.c
+++ b/mm/percpu-km.c
@@ -72,6 +72,8 @@ static struct pcpu_chunk *pcpu_create_chunk(void)
72 pcpu_chunk_populated(chunk, 0, nr_pages); 72 pcpu_chunk_populated(chunk, 0, nr_pages);
73 spin_unlock_irq(&pcpu_lock); 73 spin_unlock_irq(&pcpu_lock);
74 74
75 pcpu_stats_chunk_alloc();
76
75 return chunk; 77 return chunk;
76} 78}
77 79
@@ -79,6 +81,8 @@ static void pcpu_destroy_chunk(struct pcpu_chunk *chunk)
79{ 81{
80 const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; 82 const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
81 83
84 pcpu_stats_chunk_dealloc();
85
82 if (chunk && chunk->data) 86 if (chunk && chunk->data)
83 __free_pages(chunk->data, order_base_2(nr_pages)); 87 __free_pages(chunk->data, order_base_2(nr_pages));
84 pcpu_free_chunk(chunk); 88 pcpu_free_chunk(chunk);
diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c
new file mode 100644
index 000000000000..03524a56eeff
--- /dev/null
+++ b/mm/percpu-stats.c
@@ -0,0 +1,222 @@
1/*
2 * mm/percpu-debug.c
3 *
4 * Copyright (C) 2017 Facebook Inc.
5 * Copyright (C) 2017 Dennis Zhou <dennisz@fb.com>
6 *
7 * This file is released under the GPLv2.
8 *
9 * Prints statistics about the percpu allocator and backing chunks.
10 */
11#include <linux/debugfs.h>
12#include <linux/list.h>
13#include <linux/percpu.h>
14#include <linux/seq_file.h>
15#include <linux/sort.h>
16#include <linux/vmalloc.h>
17
18#include "percpu-internal.h"
19
20#define P(X, Y) \
21 seq_printf(m, " %-24s: %8lld\n", X, (long long int)Y)
22
23struct percpu_stats pcpu_stats;
24struct pcpu_alloc_info pcpu_stats_ai;
25
26static int cmpint(const void *a, const void *b)
27{
28 return *(int *)a - *(int *)b;
29}
30
31/*
32 * Iterates over all chunks to find the max # of map entries used.
33 */
34static int find_max_map_used(void)
35{
36 struct pcpu_chunk *chunk;
37 int slot, max_map_used;
38
39 max_map_used = 0;
40 for (slot = 0; slot < pcpu_nr_slots; slot++)
41 list_for_each_entry(chunk, &pcpu_slot[slot], list)
42 max_map_used = max(max_map_used, chunk->map_used);
43
44 return max_map_used;
45}
46
47/*
48 * Prints out chunk state. Fragmentation is considered between
49 * the beginning of the chunk to the last allocation.
50 */
51static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,
52 void *buffer)
53{
54 int i, s_index, last_alloc, alloc_sign, as_len;
55 int *alloc_sizes, *p;
56 /* statistics */
57 int sum_frag = 0, max_frag = 0;
58 int cur_min_alloc = 0, cur_med_alloc = 0, cur_max_alloc = 0;
59
60 alloc_sizes = buffer;
61 s_index = chunk->has_reserved ? 1 : 0;
62
63 /* find last allocation */
64 last_alloc = -1;
65 for (i = chunk->map_used - 1; i >= s_index; i--) {
66 if (chunk->map[i] & 1) {
67 last_alloc = i;
68 break;
69 }
70 }
71
72 /* if the chunk is not empty - ignoring reserve */
73 if (last_alloc >= s_index) {
74 as_len = last_alloc + 1 - s_index;
75
76 /*
77 * Iterate through chunk map computing size info.
78 * The first bit is overloaded to be a used flag.
79 * negative = free space, positive = allocated
80 */
81 for (i = 0, p = chunk->map + s_index; i < as_len; i++, p++) {
82 alloc_sign = (*p & 1) ? 1 : -1;
83 alloc_sizes[i] = alloc_sign *
84 ((p[1] & ~1) - (p[0] & ~1));
85 }
86
87 sort(alloc_sizes, as_len, sizeof(chunk->map[0]), cmpint, NULL);
88
89 /* Iterate through the unallocated fragements. */
90 for (i = 0, p = alloc_sizes; *p < 0 && i < as_len; i++, p++) {
91 sum_frag -= *p;
92 max_frag = max(max_frag, -1 * (*p));
93 }
94
95 cur_min_alloc = alloc_sizes[i];
96 cur_med_alloc = alloc_sizes[(i + as_len - 1) / 2];
97 cur_max_alloc = alloc_sizes[as_len - 1];
98 }
99
100 P("nr_alloc", chunk->nr_alloc);
101 P("max_alloc_size", chunk->max_alloc_size);
102 P("free_size", chunk->free_size);
103 P("contig_hint", chunk->contig_hint);
104 P("sum_frag", sum_frag);
105 P("max_frag", max_frag);
106 P("cur_min_alloc", cur_min_alloc);
107 P("cur_med_alloc", cur_med_alloc);
108 P("cur_max_alloc", cur_max_alloc);
109 seq_putc(m, '\n');
110}
111
112static int percpu_stats_show(struct seq_file *m, void *v)
113{
114 struct pcpu_chunk *chunk;
115 int slot, max_map_used;
116 void *buffer;
117
118alloc_buffer:
119 spin_lock_irq(&pcpu_lock);
120 max_map_used = find_max_map_used();
121 spin_unlock_irq(&pcpu_lock);
122
123 buffer = vmalloc(max_map_used * sizeof(pcpu_first_chunk->map[0]));
124 if (!buffer)
125 return -ENOMEM;
126
127 spin_lock_irq(&pcpu_lock);
128
129 /* if the buffer allocated earlier is too small */
130 if (max_map_used < find_max_map_used()) {
131 spin_unlock_irq(&pcpu_lock);
132 vfree(buffer);
133 goto alloc_buffer;
134 }
135
136#define PL(X) \
137 seq_printf(m, " %-24s: %8lld\n", #X, (long long int)pcpu_stats_ai.X)
138
139 seq_printf(m,
140 "Percpu Memory Statistics\n"
141 "Allocation Info:\n"
142 "----------------------------------------\n");
143 PL(unit_size);
144 PL(static_size);
145 PL(reserved_size);
146 PL(dyn_size);
147 PL(atom_size);
148 PL(alloc_size);
149 seq_putc(m, '\n');
150
151#undef PL
152
153#define PU(X) \
154 seq_printf(m, " %-18s: %14llu\n", #X, (unsigned long long)pcpu_stats.X)
155
156 seq_printf(m,
157 "Global Stats:\n"
158 "----------------------------------------\n");
159 PU(nr_alloc);
160 PU(nr_dealloc);
161 PU(nr_cur_alloc);
162 PU(nr_max_alloc);
163 PU(nr_chunks);
164 PU(nr_max_chunks);
165 PU(min_alloc_size);
166 PU(max_alloc_size);
167 seq_putc(m, '\n');
168
169#undef PU
170
171 seq_printf(m,
172 "Per Chunk Stats:\n"
173 "----------------------------------------\n");
174
175 if (pcpu_reserved_chunk) {
176 seq_puts(m, "Chunk: <- Reserved Chunk\n");
177 chunk_map_stats(m, pcpu_reserved_chunk, buffer);
178 }
179
180 for (slot = 0; slot < pcpu_nr_slots; slot++) {
181 list_for_each_entry(chunk, &pcpu_slot[slot], list) {
182 if (chunk == pcpu_first_chunk) {
183 seq_puts(m, "Chunk: <- First Chunk\n");
184 chunk_map_stats(m, chunk, buffer);
185
186
187 } else {
188 seq_puts(m, "Chunk:\n");
189 chunk_map_stats(m, chunk, buffer);
190 }
191
192 }
193 }
194
195 spin_unlock_irq(&pcpu_lock);
196
197 vfree(buffer);
198
199 return 0;
200}
201
202static int percpu_stats_open(struct inode *inode, struct file *filp)
203{
204 return single_open(filp, percpu_stats_show, NULL);
205}
206
207static const struct file_operations percpu_stats_fops = {
208 .open = percpu_stats_open,
209 .read = seq_read,
210 .llseek = seq_lseek,
211 .release = single_release,
212};
213
214static int __init init_percpu_stats_debugfs(void)
215{
216 debugfs_create_file("percpu_stats", 0444, NULL, NULL,
217 &percpu_stats_fops);
218
219 return 0;
220}
221
222late_initcall(init_percpu_stats_debugfs);
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 9ac639499bd1..5915a224da52 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -343,11 +343,16 @@ static struct pcpu_chunk *pcpu_create_chunk(void)
343 343
344 chunk->data = vms; 344 chunk->data = vms;
345 chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; 345 chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0];
346
347 pcpu_stats_chunk_alloc();
348
346 return chunk; 349 return chunk;
347} 350}
348 351
349static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) 352static void pcpu_destroy_chunk(struct pcpu_chunk *chunk)
350{ 353{
354 pcpu_stats_chunk_dealloc();
355
351 if (chunk && chunk->data) 356 if (chunk && chunk->data)
352 pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); 357 pcpu_free_vm_areas(chunk->data, pcpu_nr_groups);
353 pcpu_free_chunk(chunk); 358 pcpu_free_chunk(chunk);
diff --git a/mm/percpu.c b/mm/percpu.c
index 75ac982c19df..44a1cadf74a7 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -657,6 +657,7 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme,
657 int *p; 657 int *p;
658 658
659 lockdep_assert_held(&pcpu_lock); 659 lockdep_assert_held(&pcpu_lock);
660 pcpu_stats_area_dealloc(chunk);
660 661
661 freeme |= 1; /* we are searching for <given offset, in use> pair */ 662 freeme |= 1; /* we are searching for <given offset, in use> pair */
662 663
@@ -721,6 +722,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
721 chunk->map[0] = 0; 722 chunk->map[0] = 0;
722 chunk->map[1] = pcpu_unit_size | 1; 723 chunk->map[1] = pcpu_unit_size | 1;
723 chunk->map_used = 1; 724 chunk->map_used = 1;
725 chunk->has_reserved = false;
724 726
725 INIT_LIST_HEAD(&chunk->list); 727 INIT_LIST_HEAD(&chunk->list);
726 INIT_LIST_HEAD(&chunk->map_extend_list); 728 INIT_LIST_HEAD(&chunk->map_extend_list);
@@ -970,6 +972,7 @@ restart:
970 goto restart; 972 goto restart;
971 973
972area_found: 974area_found:
975 pcpu_stats_area_alloc(chunk, size);
973 spin_unlock_irqrestore(&pcpu_lock, flags); 976 spin_unlock_irqrestore(&pcpu_lock, flags);
974 977
975 /* populate if not all pages are already there */ 978 /* populate if not all pages are already there */
@@ -1642,6 +1645,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1642 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + 1645 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) +
1643 BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); 1646 BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long);
1644 1647
1648 pcpu_stats_save_ai(ai);
1649
1645 /* 1650 /*
1646 * Allocate chunk slots. The additional last slot is for 1651 * Allocate chunk slots. The additional last slot is for
1647 * empty chunks. 1652 * empty chunks.
@@ -1685,6 +1690,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1685 if (schunk->free_size) 1690 if (schunk->free_size)
1686 schunk->map[++schunk->map_used] = ai->static_size + schunk->free_size; 1691 schunk->map[++schunk->map_used] = ai->static_size + schunk->free_size;
1687 schunk->map[schunk->map_used] |= 1; 1692 schunk->map[schunk->map_used] |= 1;
1693 schunk->has_reserved = true;
1688 1694
1689 /* init dynamic chunk if necessary */ 1695 /* init dynamic chunk if necessary */
1690 if (dyn_size) { 1696 if (dyn_size) {
@@ -1703,6 +1709,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1703 dchunk->map[1] = pcpu_reserved_chunk_limit; 1709 dchunk->map[1] = pcpu_reserved_chunk_limit;
1704 dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1; 1710 dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1;
1705 dchunk->map_used = 2; 1711 dchunk->map_used = 2;
1712 dchunk->has_reserved = true;
1706 } 1713 }
1707 1714
1708 /* link the first chunk in */ 1715 /* link the first chunk in */
@@ -1711,6 +1718,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1711 pcpu_count_occupied_pages(pcpu_first_chunk, 1); 1718 pcpu_count_occupied_pages(pcpu_first_chunk, 1);
1712 pcpu_chunk_relocate(pcpu_first_chunk, -1); 1719 pcpu_chunk_relocate(pcpu_first_chunk, -1);
1713 1720
1721 pcpu_stats_chunk_alloc();
1722
1714 /* we're done */ 1723 /* we're done */
1715 pcpu_base_addr = base_addr; 1724 pcpu_base_addr = base_addr;
1716 return 0; 1725 return 0;