aboutsummaryrefslogtreecommitdiffstats
path: root/include/os/linux/debug_kmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'include/os/linux/debug_kmem.c')
-rw-r--r--include/os/linux/debug_kmem.c312
1 files changed, 312 insertions, 0 deletions
diff --git a/include/os/linux/debug_kmem.c b/include/os/linux/debug_kmem.c
new file mode 100644
index 0000000..a0c7d47
--- /dev/null
+++ b/include/os/linux/debug_kmem.c
@@ -0,0 +1,312 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14#include <linux/debugfs.h>
15#include <linux/seq_file.h>
16
17#include "os_linux.h"
18#include "debug_kmem.h"
19#include "kmem_priv.h"
20
21/**
22 * to_human_readable_bytes - Determine suffix for passed size.
23 *
24 * @bytes - Number of bytes to generate a suffix for.
25 * @hr_bytes [out] - The human readable number of bytes.
26 * @hr_suffix [out] - The suffix for the HR number of bytes.
27 *
28 * Computes a human readable decomposition of the passed number of bytes. The
29 * suffix for the bytes is passed back through the @hr_suffix pointer. The right
30 * number of bytes is then passed back in @hr_bytes. This returns the following
31 * ranges:
32 *
33 * 0 - 1023 B
34 * 1 - 1023 KB
35 * 1 - 1023 MB
36 * 1 - 1023 GB
37 * 1 - 1023 TB
38 * 1 - ... PB
39 */
40static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
41 const char **hr_suffix)
42{
43 static const char *suffixes[] =
44 { "B", "KB", "MB", "GB", "TB", "PB" };
45
46 u64 suffix_ind = 0;
47
48 while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
49 bytes >>= 10;
50 suffix_ind++;
51 }
52
53 /*
54 * Handle case where bytes > 1023PB.
55 */
56 suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
57 suffix_ind : ARRAY_SIZE(suffixes) - 1;
58
59 *hr_bytes = bytes;
60 *hr_suffix = suffixes[suffix_ind];
61}
62
63/**
64 * print_hr_bytes - Print human readable bytes
65 *
66 * @s - A seq_file to print to. May be NULL.
67 * @msg - A message to print before the bytes.
68 * @bytes - Number of bytes.
69 *
70 * Print @msg followed by the human readable decomposition of the passed number
71 * of bytes.
72 *
73 * If @s is NULL then this prints will be made to the kernel log.
74 */
75static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
76{
77 u64 hr_bytes;
78 const char *hr_suffix;
79
80 __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
81 __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
82}
83
84/**
85 * print_histogram - Build a histogram of the memory usage.
86 *
87 * @tracker The tracking to pull data from.
88 * @s A seq_file to dump info into.
89 */
90static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
91 struct seq_file *s)
92{
93 int i;
94 u64 pot_min, pot_max;
95 u64 nr_buckets;
96 unsigned int *buckets;
97 unsigned int total_allocs;
98 struct nvgpu_rbtree_node *node;
99 static const char histogram_line[] =
100 "++++++++++++++++++++++++++++++++++++++++";
101
102 /*
103 * pot_min is essentially a round down to the nearest power of 2. This
104 * is the start of the histogram. pot_max is just a round up to the
105 * nearest power of two. Each histogram bucket is one power of two so
106 * the histogram buckets are exponential.
107 */
108 pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
109 pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
110
111 nr_buckets = __ffs(pot_max) - __ffs(pot_min);
112
113 buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
114 if (!buckets) {
115 __pstat(s, "OOM: could not allocate bucket storage!?\n");
116 return;
117 }
118
119 /*
120 * Iterate across all of the allocs and determine what bucket they
121 * should go in. Round the size down to the nearest power of two to
122 * find the right bucket.
123 */
124 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
125 while (node) {
126 int b;
127 u64 bucket_min;
128 struct nvgpu_mem_alloc *alloc =
129 nvgpu_mem_alloc_from_rbtree_node(node);
130
131 bucket_min = (u64)rounddown_pow_of_two(alloc->size);
132 if (bucket_min < tracker->min_alloc)
133 bucket_min = tracker->min_alloc;
134
135 b = __ffs(bucket_min) - __ffs(pot_min);
136
137 /*
138 * Handle the one case were there's an alloc exactly as big as
139 * the maximum bucket size of the largest bucket. Most of the
140 * buckets have an inclusive minimum and exclusive maximum. But
141 * the largest bucket needs to have an _inclusive_ maximum as
142 * well.
143 */
144 if (b == (int)nr_buckets)
145 b--;
146
147 buckets[b]++;
148
149 nvgpu_rbtree_enum_next(&node, node);
150 }
151
152 total_allocs = 0;
153 for (i = 0; i < (int)nr_buckets; i++)
154 total_allocs += buckets[i];
155
156 __pstat(s, "Alloc histogram:\n");
157
158 /*
159 * Actually compute the histogram lines.
160 */
161 for (i = 0; i < (int)nr_buckets; i++) {
162 char this_line[sizeof(histogram_line) + 1];
163 u64 line_length;
164 u64 hr_bytes;
165 const char *hr_suffix;
166
167 memset(this_line, 0, sizeof(this_line));
168
169 /*
170 * Compute the normalized line length. Cant use floating point
171 * so we will just multiply everything by 1000 and use fixed
172 * point.
173 */
174 line_length = (1000 * buckets[i]) / total_allocs;
175 line_length *= sizeof(histogram_line);
176 line_length /= 1000;
177
178 memset(this_line, '+', line_length);
179
180 __to_human_readable_bytes(1 << (__ffs(pot_min) + i),
181 &hr_bytes, &hr_suffix);
182 __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
183 hr_bytes, hr_bytes << 1,
184 hr_suffix, buckets[i], this_line);
185 }
186}
187
188/**
189 * nvgpu_kmem_print_stats - Print kmem tracking stats.
190 *
191 * @tracker The tracking to pull data from.
192 * @s A seq_file to dump info into.
193 *
194 * Print stats from a tracker. If @s is non-null then seq_printf() will be
195 * used with @s. Otherwise the stats are pr_info()ed.
196 */
197void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
198 struct seq_file *s)
199{
200 nvgpu_lock_tracker(tracker);
201
202 __pstat(s, "Mem tracker: %s\n\n", tracker->name);
203
204 __pstat(s, "Basic Stats:\n");
205 __pstat(s, " Number of allocs %lld\n",
206 tracker->nr_allocs);
207 __pstat(s, " Number of frees %lld\n",
208 tracker->nr_frees);
209 print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
210 print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
211 print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
212 print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
213 print_hr_bytes(s, " Bytes allocated (real) ",
214 tracker->bytes_alloced_real);
215 print_hr_bytes(s, " Bytes freed (real) ",
216 tracker->bytes_freed_real);
217 __pstat(s, "\n");
218
219 print_histogram(tracker, s);
220
221 nvgpu_unlock_tracker(tracker);
222}
223
224static int __kmem_tracking_show(struct seq_file *s, void *unused)
225{
226 struct nvgpu_mem_alloc_tracker *tracker = s->private;
227
228 nvgpu_kmem_print_stats(tracker, s);
229
230 return 0;
231}
232
233static int __kmem_tracking_open(struct inode *inode, struct file *file)
234{
235 return single_open(file, __kmem_tracking_show, inode->i_private);
236}
237
238static const struct file_operations __kmem_tracking_fops = {
239 .open = __kmem_tracking_open,
240 .read = seq_read,
241 .llseek = seq_lseek,
242 .release = single_release,
243};
244
245static int __kmem_traces_dump_tracker(struct gk20a *g,
246 struct nvgpu_mem_alloc_tracker *tracker,
247 struct seq_file *s)
248{
249 struct nvgpu_rbtree_node *node;
250
251 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
252 while (node) {
253 struct nvgpu_mem_alloc *alloc =
254 nvgpu_mem_alloc_from_rbtree_node(node);
255
256 kmem_print_mem_alloc(g, alloc, s);
257
258 nvgpu_rbtree_enum_next(&node, node);
259 }
260
261 return 0;
262}
263
264static int __kmem_traces_show(struct seq_file *s, void *unused)
265{
266 struct gk20a *g = s->private;
267
268 nvgpu_lock_tracker(g->vmallocs);
269 seq_puts(s, "Oustanding vmallocs:\n");
270 __kmem_traces_dump_tracker(g, g->vmallocs, s);
271 seq_puts(s, "\n");
272 nvgpu_unlock_tracker(g->vmallocs);
273
274 nvgpu_lock_tracker(g->kmallocs);
275 seq_puts(s, "Oustanding kmallocs:\n");
276 __kmem_traces_dump_tracker(g, g->kmallocs, s);
277 nvgpu_unlock_tracker(g->kmallocs);
278
279 return 0;
280}
281
282static int __kmem_traces_open(struct inode *inode, struct file *file)
283{
284 return single_open(file, __kmem_traces_show, inode->i_private);
285}
286
287static const struct file_operations __kmem_traces_fops = {
288 .open = __kmem_traces_open,
289 .read = seq_read,
290 .llseek = seq_lseek,
291 .release = single_release,
292};
293
294void nvgpu_kmem_debugfs_init(struct gk20a *g)
295{
296 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
297 struct dentry *node;
298
299 l->debugfs_kmem = debugfs_create_dir("kmem_tracking", l->debugfs);
300 if (IS_ERR_OR_NULL(l->debugfs_kmem))
301 return;
302
303 node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
304 l->debugfs_kmem,
305 g->vmallocs, &__kmem_tracking_fops);
306 node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
307 l->debugfs_kmem,
308 g->kmallocs, &__kmem_tracking_fops);
309 node = debugfs_create_file("traces", S_IRUGO,
310 l->debugfs_kmem,
311 g, &__kmem_traces_fops);
312}