summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/debug_kmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/debug_kmem.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_kmem.c315
1 files changed, 315 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.c b/drivers/gpu/nvgpu/common/linux/debug_kmem.c
new file mode 100644
index 00000000..2ee542a8
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_kmem.c
@@ -0,0 +1,315 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_kmem.h"
16#include "kmem_priv.h"
17#include "gk20a/platform_gk20a.h"
18
19#include <linux/debugfs.h>
20#include <linux/seq_file.h>
21
22#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
23/**
24 * to_human_readable_bytes - Determine suffix for passed size.
25 *
26 * @bytes - Number of bytes to generate a suffix for.
27 * @hr_bytes [out] - The human readable number of bytes.
28 * @hr_suffix [out] - The suffix for the HR number of bytes.
29 *
30 * Computes a human readable decomposition of the passed number of bytes. The
31 * suffix for the bytes is passed back through the @hr_suffix pointer. The right
32 * number of bytes is then passed back in @hr_bytes. This returns the following
33 * ranges:
34 *
35 * 0 - 1023 B
36 * 1 - 1023 KB
37 * 1 - 1023 MB
38 * 1 - 1023 GB
39 * 1 - 1023 TB
40 * 1 - ... PB
41 */
42static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
43 const char **hr_suffix)
44{
45 static const char *suffixes[] =
46 { "B", "KB", "MB", "GB", "TB", "PB" };
47
48 u64 suffix_ind = 0;
49
50 while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
51 bytes >>= 10;
52 suffix_ind++;
53 }
54
55 /*
56 * Handle case where bytes > 1023PB.
57 */
58 suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
59 suffix_ind : ARRAY_SIZE(suffixes) - 1;
60
61 *hr_bytes = bytes;
62 *hr_suffix = suffixes[suffix_ind];
63}
64
65/**
66 * print_hr_bytes - Print human readable bytes
67 *
68 * @s - A seq_file to print to. May be NULL.
69 * @msg - A message to print before the bytes.
70 * @bytes - Number of bytes.
71 *
72 * Print @msg followed by the human readable decomposition of the passed number
73 * of bytes.
74 *
75 * If @s is NULL then this prints will be made to the kernel log.
76 */
77static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
78{
79 u64 hr_bytes;
80 const char *hr_suffix;
81
82 __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
83 __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
84}
85
86/**
87 * print_histogram - Build a histogram of the memory usage.
88 *
89 * @tracker The tracking to pull data from.
90 * @s A seq_file to dump info into.
91 */
92static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
93 struct seq_file *s)
94{
95 int i;
96 u64 pot_min, pot_max;
97 u64 nr_buckets;
98 unsigned int *buckets;
99 unsigned int total_allocs;
100 struct nvgpu_rbtree_node *node;
101 static const char histogram_line[] =
102 "++++++++++++++++++++++++++++++++++++++++";
103
104 /*
105 * pot_min is essentially a round down to the nearest power of 2. This
106 * is the start of the histogram. pot_max is just a round up to the
107 * nearest power of two. Each histogram bucket is one power of two so
108 * the histogram buckets are exponential.
109 */
110 pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
111 pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
112
113 nr_buckets = __ffs(pot_max) - __ffs(pot_min);
114
115 buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
116 if (!buckets) {
117 __pstat(s, "OOM: could not allocate bucket storage!?\n");
118 return;
119 }
120
121 /*
122 * Iterate across all of the allocs and determine what bucket they
123 * should go in. Round the size down to the nearest power of two to
124 * find the right bucket.
125 */
126 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
127 while (node) {
128 int b;
129 u64 bucket_min;
130 struct nvgpu_mem_alloc *alloc =
131 nvgpu_mem_alloc_from_rbtree_node(node);
132
133 bucket_min = (u64)rounddown_pow_of_two(alloc->size);
134 if (bucket_min < tracker->min_alloc)
135 bucket_min = tracker->min_alloc;
136
137 b = __ffs(bucket_min) - __ffs(pot_min);
138
139 /*
140 * Handle the one case were there's an alloc exactly as big as
141 * the maximum bucket size of the largest bucket. Most of the
142 * buckets have an inclusive minimum and exclusive maximum. But
143 * the largest bucket needs to have an _inclusive_ maximum as
144 * well.
145 */
146 if (b == (int)nr_buckets)
147 b--;
148
149 buckets[b]++;
150
151 nvgpu_rbtree_enum_next(&node, node);
152 }
153
154 total_allocs = 0;
155 for (i = 0; i < (int)nr_buckets; i++)
156 total_allocs += buckets[i];
157
158 __pstat(s, "Alloc histogram:\n");
159
160 /*
161 * Actually compute the histogram lines.
162 */
163 for (i = 0; i < (int)nr_buckets; i++) {
164 char this_line[sizeof(histogram_line) + 1];
165 u64 line_length;
166 u64 hr_bytes;
167 const char *hr_suffix;
168
169 memset(this_line, 0, sizeof(this_line));
170
171 /*
172 * Compute the normalized line length. Cant use floating point
173 * so we will just multiply everything by 1000 and use fixed
174 * point.
175 */
176 line_length = (1000 * buckets[i]) / total_allocs;
177 line_length *= sizeof(histogram_line);
178 line_length /= 1000;
179
180 memset(this_line, '+', line_length);
181
182 __to_human_readable_bytes(1 << (__ffs(pot_min) + i),
183 &hr_bytes, &hr_suffix);
184 __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
185 hr_bytes, hr_bytes << 1,
186 hr_suffix, buckets[i], this_line);
187 }
188}
189
190/**
191 * nvgpu_kmem_print_stats - Print kmem tracking stats.
192 *
193 * @tracker The tracking to pull data from.
194 * @s A seq_file to dump info into.
195 *
196 * Print stats from a tracker. If @s is non-null then seq_printf() will be
197 * used with @s. Otherwise the stats are pr_info()ed.
198 */
199void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
200 struct seq_file *s)
201{
202 nvgpu_lock_tracker(tracker);
203
204 __pstat(s, "Mem tracker: %s\n\n", tracker->name);
205
206 __pstat(s, "Basic Stats:\n");
207 __pstat(s, " Number of allocs %lld\n",
208 tracker->nr_allocs);
209 __pstat(s, " Number of frees %lld\n",
210 tracker->nr_frees);
211 print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
212 print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
213 print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
214 print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
215 print_hr_bytes(s, " Bytes allocated (real) ",
216 tracker->bytes_alloced_real);
217 print_hr_bytes(s, " Bytes freed (real) ",
218 tracker->bytes_freed_real);
219 __pstat(s, "\n");
220
221 print_histogram(tracker, s);
222
223 nvgpu_unlock_tracker(tracker);
224}
225
226static int __kmem_tracking_show(struct seq_file *s, void *unused)
227{
228 struct nvgpu_mem_alloc_tracker *tracker = s->private;
229
230 nvgpu_kmem_print_stats(tracker, s);
231
232 return 0;
233}
234
235static int __kmem_tracking_open(struct inode *inode, struct file *file)
236{
237 return single_open(file, __kmem_tracking_show, inode->i_private);
238}
239
240static const struct file_operations __kmem_tracking_fops = {
241 .open = __kmem_tracking_open,
242 .read = seq_read,
243 .llseek = seq_lseek,
244 .release = single_release,
245};
246
247static int __kmem_traces_dump_tracker(struct gk20a *g,
248 struct nvgpu_mem_alloc_tracker *tracker,
249 struct seq_file *s)
250{
251 struct nvgpu_rbtree_node *node;
252
253 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
254 while (node) {
255 struct nvgpu_mem_alloc *alloc =
256 nvgpu_mem_alloc_from_rbtree_node(node);
257
258 kmem_print_mem_alloc(g, alloc, s);
259
260 nvgpu_rbtree_enum_next(&node, node);
261 }
262
263 return 0;
264}
265
266static int __kmem_traces_show(struct seq_file *s, void *unused)
267{
268 struct gk20a *g = s->private;
269
270 nvgpu_lock_tracker(g->vmallocs);
271 seq_puts(s, "Oustanding vmallocs:\n");
272 __kmem_traces_dump_tracker(g, g->vmallocs, s);
273 seq_puts(s, "\n");
274 nvgpu_unlock_tracker(g->vmallocs);
275
276 nvgpu_lock_tracker(g->kmallocs);
277 seq_puts(s, "Oustanding kmallocs:\n");
278 __kmem_traces_dump_tracker(g, g->kmallocs, s);
279 nvgpu_unlock_tracker(g->kmallocs);
280
281 return 0;
282}
283
284static int __kmem_traces_open(struct inode *inode, struct file *file)
285{
286 return single_open(file, __kmem_traces_show, inode->i_private);
287}
288
289static const struct file_operations __kmem_traces_fops = {
290 .open = __kmem_traces_open,
291 .read = seq_read,
292 .llseek = seq_lseek,
293 .release = single_release,
294};
295
296void nvgpu_kmem_debugfs_init(struct gk20a *g)
297{
298 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
299 struct dentry *node;
300
301 g->debugfs_kmem = debugfs_create_dir("kmem_tracking", platform->debugfs);
302 if (IS_ERR_OR_NULL(g->debugfs_kmem))
303 return;
304
305 node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
306 g->debugfs_kmem,
307 g->vmallocs, &__kmem_tracking_fops);
308 node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
309 g->debugfs_kmem,
310 g->kmallocs, &__kmem_tracking_fops);
311 node = debugfs_create_file("traces", S_IRUGO,
312 g->debugfs_kmem,
313 g, &__kmem_traces_fops);
314}
315#endif