summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/debug_kmem.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2017-05-24 08:07:04 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-06-02 09:53:35 -0400
commit6090a8a7ee347f92d806f104d3a0082208f5df64 (patch)
tree74b0d7057ea1b112d7de41f1bbce5e212f1525de /drivers/gpu/nvgpu/common/linux/debug_kmem.c
parentbe7f22db8bc5bff131432a4f6d127ecc8ce5096d (diff)
gpu: nvgpu: move debugfs code to linux module
Since all debugfs code is Linux specific, remove it from common code and move it to Linux module Debugfs code is now divided into below module specific files : common/linux/debug.c common/linux/debug_cde.c common/linux/debug_ce.c common/linux/debug_fifo.c common/linux/debug_gr.c common/linux/debug_mm.c common/linux/debug_allocator.c common/linux/debug_kmem.c common/linux/debug_pmu.c common/linux/debug_sched.c Add corresponding header files for above modules too And compile all of above files only if CONFIG_DEBUG_FS is set Some more details of the changes made - Move and rename gk20a/debug_gk20a.c to common/linux/debug.c - Move and rename gk20a/debug_gk20a.h to include/nvgpu/debug.h - Remove gm20b/debug_gm20b.c and gm20b/debug_gm20b.h and call gk20a_init_debug_ops() directly from gm20b_init_hal() - Update all debug APIs to receive struct gk20a as parameter instead of receiving struct device pointer - Update API gk20a_dmabuf_get_state() to receive struct gk20a pointer instead of struct device - Include <nvgpu/debug.h> explicitly in all files where debug operations are used - Remove "gk20a/platform_gk20a.h" include from HAL files which no longer need this include - Add new API gk20a_debug_deinit() to deinitialize debugfs and call it from gk20a_remove() - Move API gk20a_debug_dump_all_channel_status_ramfc() to gk20a/fifo_gk20a.c Jira NVGPU-62 Change-Id: I076975d3d7f669bdbe9212fa33d98529377feeb6 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1488902 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/debug_kmem.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_kmem.c315
1 files changed, 315 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.c b/drivers/gpu/nvgpu/common/linux/debug_kmem.c
new file mode 100644
index 00000000..2ee542a8
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_kmem.c
@@ -0,0 +1,315 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_kmem.h"
16#include "kmem_priv.h"
17#include "gk20a/platform_gk20a.h"
18
19#include <linux/debugfs.h>
20#include <linux/seq_file.h>
21
22#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
23/**
24 * to_human_readable_bytes - Determine suffix for passed size.
25 *
26 * @bytes - Number of bytes to generate a suffix for.
27 * @hr_bytes [out] - The human readable number of bytes.
28 * @hr_suffix [out] - The suffix for the HR number of bytes.
29 *
30 * Computes a human readable decomposition of the passed number of bytes. The
31 * suffix for the bytes is passed back through the @hr_suffix pointer. The right
32 * number of bytes is then passed back in @hr_bytes. This returns the following
33 * ranges:
34 *
35 * 0 - 1023 B
36 * 1 - 1023 KB
37 * 1 - 1023 MB
38 * 1 - 1023 GB
39 * 1 - 1023 TB
40 * 1 - ... PB
41 */
42static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
43 const char **hr_suffix)
44{
45 static const char *suffixes[] =
46 { "B", "KB", "MB", "GB", "TB", "PB" };
47
48 u64 suffix_ind = 0;
49
50 while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
51 bytes >>= 10;
52 suffix_ind++;
53 }
54
55 /*
56 * Handle case where bytes > 1023PB.
57 */
58 suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
59 suffix_ind : ARRAY_SIZE(suffixes) - 1;
60
61 *hr_bytes = bytes;
62 *hr_suffix = suffixes[suffix_ind];
63}
64
65/**
66 * print_hr_bytes - Print human readable bytes
67 *
68 * @s - A seq_file to print to. May be NULL.
69 * @msg - A message to print before the bytes.
70 * @bytes - Number of bytes.
71 *
72 * Print @msg followed by the human readable decomposition of the passed number
73 * of bytes.
74 *
75 * If @s is NULL then this prints will be made to the kernel log.
76 */
77static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
78{
79 u64 hr_bytes;
80 const char *hr_suffix;
81
82 __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
83 __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
84}
85
86/**
87 * print_histogram - Build a histogram of the memory usage.
88 *
89 * @tracker The tracking to pull data from.
90 * @s A seq_file to dump info into.
91 */
92static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
93 struct seq_file *s)
94{
95 int i;
96 u64 pot_min, pot_max;
97 u64 nr_buckets;
98 unsigned int *buckets;
99 unsigned int total_allocs;
100 struct nvgpu_rbtree_node *node;
101 static const char histogram_line[] =
102 "++++++++++++++++++++++++++++++++++++++++";
103
104 /*
105 * pot_min is essentially a round down to the nearest power of 2. This
106 * is the start of the histogram. pot_max is just a round up to the
107 * nearest power of two. Each histogram bucket is one power of two so
108 * the histogram buckets are exponential.
109 */
110 pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
111 pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
112
113 nr_buckets = __ffs(pot_max) - __ffs(pot_min);
114
115 buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
116 if (!buckets) {
117 __pstat(s, "OOM: could not allocate bucket storage!?\n");
118 return;
119 }
120
121 /*
122 * Iterate across all of the allocs and determine what bucket they
123 * should go in. Round the size down to the nearest power of two to
124 * find the right bucket.
125 */
126 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
127 while (node) {
128 int b;
129 u64 bucket_min;
130 struct nvgpu_mem_alloc *alloc =
131 nvgpu_mem_alloc_from_rbtree_node(node);
132
133 bucket_min = (u64)rounddown_pow_of_two(alloc->size);
134 if (bucket_min < tracker->min_alloc)
135 bucket_min = tracker->min_alloc;
136
137 b = __ffs(bucket_min) - __ffs(pot_min);
138
139 /*
140 * Handle the one case were there's an alloc exactly as big as
141 * the maximum bucket size of the largest bucket. Most of the
142 * buckets have an inclusive minimum and exclusive maximum. But
143 * the largest bucket needs to have an _inclusive_ maximum as
144 * well.
145 */
146 if (b == (int)nr_buckets)
147 b--;
148
149 buckets[b]++;
150
151 nvgpu_rbtree_enum_next(&node, node);
152 }
153
154 total_allocs = 0;
155 for (i = 0; i < (int)nr_buckets; i++)
156 total_allocs += buckets[i];
157
158 __pstat(s, "Alloc histogram:\n");
159
160 /*
161 * Actually compute the histogram lines.
162 */
163 for (i = 0; i < (int)nr_buckets; i++) {
164 char this_line[sizeof(histogram_line) + 1];
165 u64 line_length;
166 u64 hr_bytes;
167 const char *hr_suffix;
168
169 memset(this_line, 0, sizeof(this_line));
170
171 /*
172 * Compute the normalized line length. Cant use floating point
173 * so we will just multiply everything by 1000 and use fixed
174 * point.
175 */
176 line_length = (1000 * buckets[i]) / total_allocs;
177 line_length *= sizeof(histogram_line);
178 line_length /= 1000;
179
180 memset(this_line, '+', line_length);
181
182 __to_human_readable_bytes(1 << (__ffs(pot_min) + i),
183 &hr_bytes, &hr_suffix);
184 __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
185 hr_bytes, hr_bytes << 1,
186 hr_suffix, buckets[i], this_line);
187 }
188}
189
190/**
191 * nvgpu_kmem_print_stats - Print kmem tracking stats.
192 *
193 * @tracker The tracking to pull data from.
194 * @s A seq_file to dump info into.
195 *
196 * Print stats from a tracker. If @s is non-null then seq_printf() will be
197 * used with @s. Otherwise the stats are pr_info()ed.
198 */
199void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
200 struct seq_file *s)
201{
202 nvgpu_lock_tracker(tracker);
203
204 __pstat(s, "Mem tracker: %s\n\n", tracker->name);
205
206 __pstat(s, "Basic Stats:\n");
207 __pstat(s, " Number of allocs %lld\n",
208 tracker->nr_allocs);
209 __pstat(s, " Number of frees %lld\n",
210 tracker->nr_frees);
211 print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
212 print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
213 print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
214 print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
215 print_hr_bytes(s, " Bytes allocated (real) ",
216 tracker->bytes_alloced_real);
217 print_hr_bytes(s, " Bytes freed (real) ",
218 tracker->bytes_freed_real);
219 __pstat(s, "\n");
220
221 print_histogram(tracker, s);
222
223 nvgpu_unlock_tracker(tracker);
224}
225
226static int __kmem_tracking_show(struct seq_file *s, void *unused)
227{
228 struct nvgpu_mem_alloc_tracker *tracker = s->private;
229
230 nvgpu_kmem_print_stats(tracker, s);
231
232 return 0;
233}
234
235static int __kmem_tracking_open(struct inode *inode, struct file *file)
236{
237 return single_open(file, __kmem_tracking_show, inode->i_private);
238}
239
240static const struct file_operations __kmem_tracking_fops = {
241 .open = __kmem_tracking_open,
242 .read = seq_read,
243 .llseek = seq_lseek,
244 .release = single_release,
245};
246
247static int __kmem_traces_dump_tracker(struct gk20a *g,
248 struct nvgpu_mem_alloc_tracker *tracker,
249 struct seq_file *s)
250{
251 struct nvgpu_rbtree_node *node;
252
253 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
254 while (node) {
255 struct nvgpu_mem_alloc *alloc =
256 nvgpu_mem_alloc_from_rbtree_node(node);
257
258 kmem_print_mem_alloc(g, alloc, s);
259
260 nvgpu_rbtree_enum_next(&node, node);
261 }
262
263 return 0;
264}
265
266static int __kmem_traces_show(struct seq_file *s, void *unused)
267{
268 struct gk20a *g = s->private;
269
270 nvgpu_lock_tracker(g->vmallocs);
271 seq_puts(s, "Oustanding vmallocs:\n");
272 __kmem_traces_dump_tracker(g, g->vmallocs, s);
273 seq_puts(s, "\n");
274 nvgpu_unlock_tracker(g->vmallocs);
275
276 nvgpu_lock_tracker(g->kmallocs);
277 seq_puts(s, "Oustanding kmallocs:\n");
278 __kmem_traces_dump_tracker(g, g->kmallocs, s);
279 nvgpu_unlock_tracker(g->kmallocs);
280
281 return 0;
282}
283
284static int __kmem_traces_open(struct inode *inode, struct file *file)
285{
286 return single_open(file, __kmem_traces_show, inode->i_private);
287}
288
289static const struct file_operations __kmem_traces_fops = {
290 .open = __kmem_traces_open,
291 .read = seq_read,
292 .llseek = seq_lseek,
293 .release = single_release,
294};
295
296void nvgpu_kmem_debugfs_init(struct gk20a *g)
297{
298 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
299 struct dentry *node;
300
301 g->debugfs_kmem = debugfs_create_dir("kmem_tracking", platform->debugfs);
302 if (IS_ERR_OR_NULL(g->debugfs_kmem))
303 return;
304
305 node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
306 g->debugfs_kmem,
307 g->vmallocs, &__kmem_tracking_fops);
308 node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
309 g->debugfs_kmem,
310 g->kmallocs, &__kmem_tracking_fops);
311 node = debugfs_create_file("traces", S_IRUGO,
312 g->debugfs_kmem,
313 g, &__kmem_traces_fops);
314}
315#endif