summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug.c376
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_allocator.c80
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_allocator.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_cde.c51
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_cde.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_ce.c30
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_ce.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_fifo.c369
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_fifo.h22
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_gr.c31
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_gr.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_kmem.c315
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_kmem.h23
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_mm.c26
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_mm.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_pmu.c479
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_pmu.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_sched.c79
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_sched.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/driver_common.c3
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_channel.c2
-rw-r--r--drivers/gpu/nvgpu/common/linux/kmem.c323
-rw-r--r--drivers/gpu/nvgpu/common/linux/kmem_priv.h8
-rw-r--r--drivers/gpu/nvgpu/common/linux/module.c6
-rw-r--r--drivers/gpu/nvgpu/common/mm/bitmap_allocator.c2
-rw-r--r--drivers/gpu/nvgpu/common/mm/buddy_allocator.c4
-rw-r--r--drivers/gpu/nvgpu/common/mm/lockless_allocator.c4
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c70
-rw-r--r--drivers/gpu/nvgpu/common/mm/page_allocator.c2
29 files changed, 2064 insertions, 388 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/debug.c b/drivers/gpu/nvgpu/common/linux/debug.c
new file mode 100644
index 00000000..2962a467
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug.c
@@ -0,0 +1,376 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_cde.h"
16#include "debug_ce.h"
17#include "debug_fifo.h"
18#include "debug_gr.h"
19#include "debug_mm.h"
20#include "debug_allocator.h"
21#include "debug_kmem.h"
22#include "debug_pmu.h"
23#include "debug_sched.h"
24
25#include "gk20a/gk20a.h"
26#include "gk20a/platform_gk20a.h"
27
28#include <linux/debugfs.h>
29#include <linux/seq_file.h>
30
31#include <nvgpu/debug.h>
32
33unsigned int gk20a_debug_trace_cmdbuf;
34
35static inline void gk20a_debug_write_printk(void *ctx, const char *str,
36 size_t len)
37{
38 pr_info("%s", str);
39}
40
41static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str,
42 size_t len)
43{
44 seq_write((struct seq_file *)ctx, str, len);
45}
46
47void gk20a_debug_output(struct gk20a_debug_output *o,
48 const char *fmt, ...)
49{
50 va_list args;
51 int len;
52
53 va_start(args, fmt);
54 len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
55 va_end(args);
56 o->fn(o->ctx, o->buf, len);
57}
58
59static int gk20a_gr_dump_regs(struct gk20a *g,
60 struct gk20a_debug_output *o)
61{
62 if (g->ops.gr.dump_gr_regs)
63 gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o));
64
65 return 0;
66}
67
68int gk20a_gr_debug_dump(struct gk20a *g)
69{
70 struct gk20a_debug_output o = {
71 .fn = gk20a_debug_write_printk
72 };
73
74 gk20a_gr_dump_regs(g, &o);
75
76 return 0;
77}
78
79static int gk20a_gr_debug_show(struct seq_file *s, void *unused)
80{
81 struct device *dev = s->private;
82 struct gk20a *g = gk20a_get_platform(dev)->g;
83 struct gk20a_debug_output o = {
84 .fn = gk20a_debug_write_to_seqfile,
85 .ctx = s,
86 };
87 int err;
88
89 err = gk20a_busy(g);
90 if (err) {
91 nvgpu_err(g, "failed to power on gpu: %d", err);
92 return -EINVAL;
93 }
94
95 gk20a_gr_dump_regs(g, &o);
96
97 gk20a_idle(g);
98
99 return 0;
100}
101
102void gk20a_debug_dump(struct gk20a *g)
103{
104 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
105 struct gk20a_debug_output o = {
106 .fn = gk20a_debug_write_printk
107 };
108
109 if (platform->dump_platform_dependencies)
110 platform->dump_platform_dependencies(g->dev);
111
112 /* HAL only initialized after 1st power-on */
113 if (g->ops.debug.show_dump)
114 g->ops.debug.show_dump(g, &o);
115}
116
117static int gk20a_debug_show(struct seq_file *s, void *unused)
118{
119 struct device *dev = s->private;
120 struct gk20a_debug_output o = {
121 .fn = gk20a_debug_write_to_seqfile,
122 .ctx = s,
123 };
124 struct gk20a *g;
125 int err;
126
127 g = gk20a_get_platform(dev)->g;
128
129 err = gk20a_busy(g);
130 if (err) {
131 nvgpu_err(g, "failed to power on gpu: %d", err);
132 return -EFAULT;
133 }
134
135 /* HAL only initialized after 1st power-on */
136 if (g->ops.debug.show_dump)
137 g->ops.debug.show_dump(g, &o);
138
139 gk20a_idle(g);
140 return 0;
141}
142
143static int gk20a_gr_debug_open(struct inode *inode, struct file *file)
144{
145 return single_open(file, gk20a_gr_debug_show, inode->i_private);
146}
147
148static int gk20a_debug_open(struct inode *inode, struct file *file)
149{
150 return single_open(file, gk20a_debug_show, inode->i_private);
151}
152
153static const struct file_operations gk20a_gr_debug_fops = {
154 .open = gk20a_gr_debug_open,
155 .read = seq_read,
156 .llseek = seq_lseek,
157 .release = single_release,
158};
159
160static const struct file_operations gk20a_debug_fops = {
161 .open = gk20a_debug_open,
162 .read = seq_read,
163 .llseek = seq_lseek,
164 .release = single_release,
165};
166
167void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
168{
169 g->ops.fifo.dump_pbdma_status(g, o);
170 g->ops.fifo.dump_eng_status(g, o);
171
172 gk20a_debug_dump_all_channel_status_ramfc(g, o);
173}
174
175void gk20a_init_debug_ops(struct gpu_ops *gops)
176{
177 gops->debug.show_dump = gk20a_debug_show_dump;
178}
179
180static int railgate_residency_show(struct seq_file *s, void *data)
181{
182 struct gk20a *g = s->private;
183 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
184 unsigned long time_since_last_state_transition_ms;
185 unsigned long total_rail_gate_time_ms;
186 unsigned long total_rail_ungate_time_ms;
187
188 if (platform->is_railgated(g->dev)) {
189 time_since_last_state_transition_ms =
190 jiffies_to_msecs(jiffies -
191 g->pstats.last_rail_gate_complete);
192 total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms;
193 total_rail_gate_time_ms =
194 g->pstats.total_rail_gate_time_ms +
195 time_since_last_state_transition_ms;
196 } else {
197 time_since_last_state_transition_ms =
198 jiffies_to_msecs(jiffies -
199 g->pstats.last_rail_ungate_complete);
200 total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms;
201 total_rail_ungate_time_ms =
202 g->pstats.total_rail_ungate_time_ms +
203 time_since_last_state_transition_ms;
204 }
205
206 seq_printf(s, "Time with Rails Gated: %lu ms\n"
207 "Time with Rails UnGated: %lu ms\n"
208 "Total railgating cycles: %lu\n",
209 total_rail_gate_time_ms,
210 total_rail_ungate_time_ms,
211 g->pstats.railgating_cycle_count - 1);
212 return 0;
213
214}
215
216static int railgate_residency_open(struct inode *inode, struct file *file)
217{
218 return single_open(file, railgate_residency_show, inode->i_private);
219}
220
221static const struct file_operations railgate_residency_fops = {
222 .open = railgate_residency_open,
223 .read = seq_read,
224 .llseek = seq_lseek,
225 .release = single_release,
226};
227
228static int gk20a_railgating_debugfs_init(struct gk20a *g)
229{
230 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
231 struct dentry *d;
232
233 if (!g->can_railgate)
234 return 0;
235
236 d = debugfs_create_file(
237 "railgate_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
238 &railgate_residency_fops);
239 if (!d)
240 return -ENOMEM;
241
242 return 0;
243}
244
245void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
246{
247 struct device *dev = g->dev;
248 struct gk20a_platform *platform = dev_get_drvdata(dev);
249
250 platform->debugfs = debugfs_create_dir(dev_name(dev), NULL);
251 if (!platform->debugfs)
252 return;
253
254 if (debugfs_symlink)
255 platform->debugfs_alias =
256 debugfs_create_symlink(debugfs_symlink,
257 NULL, dev_name(dev));
258
259 debugfs_create_file("status", S_IRUGO, platform->debugfs,
260 dev, &gk20a_debug_fops);
261 debugfs_create_file("gr_status", S_IRUGO, platform->debugfs,
262 dev, &gk20a_gr_debug_fops);
263 debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR,
264 platform->debugfs, &gk20a_debug_trace_cmdbuf);
265
266 debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR,
267 platform->debugfs, &g->ch_wdt_timeout_ms);
268
269 debugfs_create_u32("disable_syncpoints", S_IRUGO|S_IWUSR,
270 platform->debugfs, &g->disable_syncpoints);
271
272 /* Legacy debugging API. */
273 debugfs_create_u32("dbg_mask", S_IRUGO|S_IWUSR,
274 platform->debugfs, &nvgpu_dbg_mask);
275
276 /* New debug logging API. */
277 debugfs_create_u32("log_mask", S_IRUGO|S_IWUSR,
278 platform->debugfs, &g->log_mask);
279 debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR,
280 platform->debugfs, &g->log_trace);
281
282 nvgpu_spinlock_init(&g->debugfs_lock);
283
284 g->mm.ltc_enabled = true;
285 g->mm.ltc_enabled_debug = true;
286
287 g->debugfs_ltc_enabled =
288 debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
289 platform->debugfs,
290 &g->mm.ltc_enabled_debug);
291
292 g->debugfs_gr_idle_timeout_default =
293 debugfs_create_u32("gr_idle_timeout_default_us",
294 S_IRUGO|S_IWUSR, platform->debugfs,
295 &g->gr_idle_timeout_default);
296 g->debugfs_timeouts_enabled =
297 debugfs_create_bool("timeouts_enabled",
298 S_IRUGO|S_IWUSR,
299 platform->debugfs,
300 &g->timeouts_enabled);
301
302 g->debugfs_bypass_smmu =
303 debugfs_create_bool("bypass_smmu",
304 S_IRUGO|S_IWUSR,
305 platform->debugfs,
306 &g->mm.bypass_smmu);
307 g->debugfs_disable_bigpage =
308 debugfs_create_bool("disable_bigpage",
309 S_IRUGO|S_IWUSR,
310 platform->debugfs,
311 &g->mm.disable_bigpage);
312
313 g->debugfs_timeslice_low_priority_us =
314 debugfs_create_u32("timeslice_low_priority_us",
315 S_IRUGO|S_IWUSR,
316 platform->debugfs,
317 &g->timeslice_low_priority_us);
318 g->debugfs_timeslice_medium_priority_us =
319 debugfs_create_u32("timeslice_medium_priority_us",
320 S_IRUGO|S_IWUSR,
321 platform->debugfs,
322 &g->timeslice_medium_priority_us);
323 g->debugfs_timeslice_high_priority_us =
324 debugfs_create_u32("timeslice_high_priority_us",
325 S_IRUGO|S_IWUSR,
326 platform->debugfs,
327 &g->timeslice_high_priority_us);
328 g->debugfs_runlist_interleave =
329 debugfs_create_bool("runlist_interleave",
330 S_IRUGO|S_IWUSR,
331 platform->debugfs,
332 &g->runlist_interleave);
333#ifdef CONFIG_ARCH_TEGRA_18x_SOC
334 g->gr.t18x.ctx_vars.debugfs_force_preemption_gfxp =
335 debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR,
336 platform->debugfs,
337 &g->gr.t18x.ctx_vars.force_preemption_gfxp);
338
339 g->gr.t18x.ctx_vars.debugfs_force_preemption_cilp =
340 debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR,
341 platform->debugfs,
342 &g->gr.t18x.ctx_vars.force_preemption_cilp);
343
344 g->gr.t18x.ctx_vars.debugfs_dump_ctxsw_stats =
345 debugfs_create_bool("dump_ctxsw_stats_on_channel_close",
346 S_IRUGO|S_IWUSR, platform->debugfs,
347 &g->gr.t18x.
348 ctx_vars.dump_ctxsw_stats_on_channel_close);
349#endif
350
351 gr_gk20a_debugfs_init(g);
352 gk20a_pmu_debugfs_init(g);
353 gk20a_railgating_debugfs_init(g);
354 gk20a_cde_debugfs_init(g);
355 gk20a_ce_debugfs_init(g);
356 nvgpu_alloc_debugfs_init(g);
357 gk20a_mm_debugfs_init(g);
358 gk20a_fifo_debugfs_init(g);
359 gk20a_sched_debugfs_init(g);
360#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
361 nvgpu_kmem_debugfs_init(g);
362#endif
363}
364
365void gk20a_debug_deinit(struct gk20a *g)
366{
367 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
368
369 if (!platform->debugfs)
370 return;
371
372 gk20a_fifo_debugfs_deinit(g);
373
374 debugfs_remove_recursive(platform->debugfs);
375 debugfs_remove_recursive(platform->debugfs_alias);
376}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.c b/drivers/gpu/nvgpu/common/linux/debug_allocator.c
new file mode 100644
index 00000000..3d4a2bb2
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_allocator.c
@@ -0,0 +1,80 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_allocator.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21#include <nvgpu/allocator.h>
22
23u32 nvgpu_alloc_tracing_on;
24
25void nvgpu_alloc_print_stats(struct nvgpu_allocator *__a,
26 struct seq_file *s, int lock)
27{
28 __a->ops->print_stats(__a, s, lock);
29}
30
31static int __alloc_show(struct seq_file *s, void *unused)
32{
33 struct nvgpu_allocator *a = s->private;
34
35 nvgpu_alloc_print_stats(a, s, 1);
36
37 return 0;
38}
39
40static int __alloc_open(struct inode *inode, struct file *file)
41{
42 return single_open(file, __alloc_show, inode->i_private);
43}
44
45static const struct file_operations __alloc_fops = {
46 .open = __alloc_open,
47 .read = seq_read,
48 .llseek = seq_lseek,
49 .release = single_release,
50};
51
52void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
53{
54 if (!g->debugfs_allocators)
55 return;
56
57 a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
58 g->debugfs_allocators,
59 a, &__alloc_fops);
60}
61
62void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
63{
64 if (!IS_ERR_OR_NULL(a->debugfs_entry))
65 debugfs_remove(a->debugfs_entry);
66}
67
68void nvgpu_alloc_debugfs_init(struct gk20a *g)
69{
70 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
71
72 g->debugfs_allocators = debugfs_create_dir("allocators", platform->debugfs);
73 if (IS_ERR_OR_NULL(g->debugfs_allocators)) {
74 g->debugfs_allocators = NULL;
75 return;
76 }
77
78 debugfs_create_u32("tracing", 0664, g->debugfs_allocators,
79 &nvgpu_alloc_tracing_on);
80}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.h b/drivers/gpu/nvgpu/common/linux/debug_allocator.h
new file mode 100644
index 00000000..1b21cfc5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_allocator.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_ALLOCATOR_H__
16#define __NVGPU_DEBUG_ALLOCATOR_H__
17
18struct gk20a;
19void nvgpu_alloc_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.c b/drivers/gpu/nvgpu/common/linux/debug_cde.c
new file mode 100644
index 00000000..eb7c33e2
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_cde.c
@@ -0,0 +1,51 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_cde.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19
20
21static ssize_t gk20a_cde_reload_write(struct file *file,
22 const char __user *userbuf, size_t count, loff_t *ppos)
23{
24 struct gk20a *g = file->private_data;
25 gk20a_cde_reload(g);
26 return count;
27}
28
29static const struct file_operations gk20a_cde_reload_fops = {
30 .open = simple_open,
31 .write = gk20a_cde_reload_write,
32};
33
34void gk20a_cde_debugfs_init(struct gk20a *g)
35{
36 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
37
38 if (!platform->has_cde)
39 return;
40
41 debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
42 platform->debugfs, &g->cde_app.shader_parameter);
43 debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
44 platform->debugfs, &g->cde_app.ctx_count);
45 debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
46 platform->debugfs, &g->cde_app.ctx_usecount);
47 debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
48 platform->debugfs, &g->cde_app.ctx_count_top);
49 debugfs_create_file("reload_cde_firmware", S_IWUSR, platform->debugfs,
50 g, &gk20a_cde_reload_fops);
51}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.h b/drivers/gpu/nvgpu/common/linux/debug_cde.h
new file mode 100644
index 00000000..4895edd6
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_cde.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_CDE_H__
16#define __NVGPU_DEBUG_CDE_H__
17
18struct gk20a;
19void gk20a_cde_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_CDE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_ce.c b/drivers/gpu/nvgpu/common/linux/debug_ce.c
new file mode 100644
index 00000000..9c50870e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_ce.c
@@ -0,0 +1,30 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_ce.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19
20void gk20a_ce_debugfs_init(struct gk20a *g)
21{
22 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
23
24 debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO,
25 platform->debugfs, &g->ce_app.ctx_count);
26 debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO,
27 platform->debugfs, &g->ce_app.app_state);
28 debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO,
29 platform->debugfs, &g->ce_app.next_ctx_id);
30}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_ce.h b/drivers/gpu/nvgpu/common/linux/debug_ce.h
new file mode 100644
index 00000000..2a8750c4
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_ce.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_CE_H__
16#define __NVGPU_DEBUG_CE_H__
17
18struct gk20a;
19void gk20a_ce_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_CE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
new file mode 100644
index 00000000..6a28b1a5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
@@ -0,0 +1,369 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_fifo.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21#include <nvgpu/sort.h>
22
23void __gk20a_fifo_profile_free(struct kref *ref);
24
25static void *gk20a_fifo_sched_debugfs_seq_start(
26 struct seq_file *s, loff_t *pos)
27{
28 struct gk20a *g = s->private;
29 struct fifo_gk20a *f = &g->fifo;
30
31 if (*pos >= f->num_channels)
32 return NULL;
33
34 return &f->channel[*pos];
35}
36
37static void *gk20a_fifo_sched_debugfs_seq_next(
38 struct seq_file *s, void *v, loff_t *pos)
39{
40 struct gk20a *g = s->private;
41 struct fifo_gk20a *f = &g->fifo;
42
43 ++(*pos);
44 if (*pos >= f->num_channels)
45 return NULL;
46
47 return &f->channel[*pos];
48}
49
50static void gk20a_fifo_sched_debugfs_seq_stop(
51 struct seq_file *s, void *v)
52{
53}
54
55static int gk20a_fifo_sched_debugfs_seq_show(
56 struct seq_file *s, void *v)
57{
58 struct gk20a *g = s->private;
59 struct fifo_gk20a *f = &g->fifo;
60 struct channel_gk20a *ch = v;
61 struct tsg_gk20a *tsg = NULL;
62
63 struct fifo_engine_info_gk20a *engine_info;
64 struct fifo_runlist_info_gk20a *runlist;
65 u32 runlist_id;
66 int ret = SEQ_SKIP;
67 u32 engine_id;
68
69 engine_id = gk20a_fifo_get_gr_engine_id(g);
70 engine_info = (f->engine_info + engine_id);
71 runlist_id = engine_info->runlist_id;
72 runlist = &f->runlist_info[runlist_id];
73
74 if (ch == f->channel) {
75 seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n");
76 seq_puts(s, " (usecs) (msecs)\n");
77 ret = 0;
78 }
79
80 if (!test_bit(ch->hw_chid, runlist->active_channels))
81 return ret;
82
83 if (gk20a_channel_get(ch)) {
84 if (gk20a_is_channel_marked_as_tsg(ch))
85 tsg = &f->tsg[ch->tsgid];
86
87 seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
88 ch->hw_chid,
89 ch->tsgid,
90 ch->tgid,
91 tsg ? tsg->timeslice_us : ch->timeslice_us,
92 ch->timeout_ms_max,
93 tsg ? tsg->interleave_level : ch->interleave_level,
94 ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX,
95 ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX);
96 gk20a_channel_put(ch);
97 }
98 return 0;
99}
100
101static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
102 .start = gk20a_fifo_sched_debugfs_seq_start,
103 .next = gk20a_fifo_sched_debugfs_seq_next,
104 .stop = gk20a_fifo_sched_debugfs_seq_stop,
105 .show = gk20a_fifo_sched_debugfs_seq_show
106};
107
108static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
109 struct file *file)
110{
111 int err;
112
113 if (!capable(CAP_SYS_ADMIN))
114 return -EPERM;
115
116 err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
117 if (err)
118 return err;
119
120 gk20a_dbg(gpu_dbg_info, "i_private=%p", inode->i_private);
121
122 ((struct seq_file *)file->private_data)->private = inode->i_private;
123 return 0;
124};
125
126/*
127 * The file operations structure contains our open function along with
128 * set of the canned seq_ ops.
129 */
130static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
131 .owner = THIS_MODULE,
132 .open = gk20a_fifo_sched_debugfs_open,
133 .read = seq_read,
134 .llseek = seq_lseek,
135 .release = seq_release
136};
137
138static int gk20a_fifo_profile_enable(void *data, u64 val)
139{
140 struct gk20a *g = (struct gk20a *) data;
141 struct fifo_gk20a *f = &g->fifo;
142
143
144 nvgpu_mutex_acquire(&f->profile.lock);
145 if (val == 0) {
146 if (f->profile.enabled) {
147 f->profile.enabled = false;
148 kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
149 }
150 } else {
151 if (!f->profile.enabled) {
152 /* not kref init as it can have a running condition if
153 * we enable/disable/enable while kickoff is happening
154 */
155 if (!kref_get_unless_zero(&f->profile.ref)) {
156 f->profile.data = vzalloc(
157 FIFO_PROFILING_ENTRIES *
158 sizeof(struct fifo_profile_gk20a));
159 f->profile.sorted = vzalloc(
160 FIFO_PROFILING_ENTRIES *
161 sizeof(u64));
162 if (!(f->profile.data && f->profile.sorted)) {
163 nvgpu_vfree(g, f->profile.data);
164 nvgpu_vfree(g, f->profile.sorted);
165 nvgpu_mutex_release(&f->profile.lock);
166 return -ENOMEM;
167 }
168 kref_init(&f->profile.ref);
169 }
170 atomic_set(&f->profile.get, 0);
171 f->profile.enabled = true;
172 }
173 }
174 nvgpu_mutex_release(&f->profile.lock);
175
176 return 0;
177}
178
179DEFINE_SIMPLE_ATTRIBUTE(
180 gk20a_fifo_profile_enable_debugfs_fops,
181 NULL,
182 gk20a_fifo_profile_enable,
183 "%llu\n"
184);
185
186static int __profile_cmp(const void *a, const void *b)
187{
188 return *((unsigned long long *) a) - *((unsigned long long *) b);
189}
190
191/*
192 * This uses about 800b in the stack, but the function using it is not part
193 * of a callstack where much memory is being used, so it is fine
194 */
195#define PERCENTILE_WIDTH 5
196#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH)
197
198static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
199 u64 *percentiles, u32 index_end, u32 index_start)
200{
201 unsigned int nelem = 0;
202 unsigned int index;
203 struct fifo_profile_gk20a *profile;
204
205 for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
206 profile = &g->fifo.profile.data[index];
207
208 if (profile->timestamp[index_end] >
209 profile->timestamp[index_start]) {
210 /* This is a valid element */
211 g->fifo.profile.sorted[nelem] =
212 profile->timestamp[index_end] -
213 profile->timestamp[index_start];
214 nelem++;
215 }
216 }
217
218 /* sort it */
219 sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
220 __profile_cmp, NULL);
221
222 /* build ranges */
223 for (index = 0; index < PERCENTILE_RANGES; index++)
224 percentiles[index] =
225 g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
226 nelem)/100 - 1];
227 return nelem;
228}
229
230static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
231{
232 struct gk20a *g = s->private;
233 unsigned int get, nelem, index;
234 /*
235 * 800B in the stack, but function is declared statically and only
236 * called from debugfs handler
237 */
238 u64 percentiles_ioctl[PERCENTILE_RANGES];
239 u64 percentiles_kickoff[PERCENTILE_RANGES];
240 u64 percentiles_jobtracking[PERCENTILE_RANGES];
241 u64 percentiles_append[PERCENTILE_RANGES];
242 u64 percentiles_userd[PERCENTILE_RANGES];
243
244 if (!kref_get_unless_zero(&g->fifo.profile.ref)) {
245 seq_printf(s, "Profiling disabled\n");
246 return 0;
247 }
248
249 get = atomic_read(&g->fifo.profile.get);
250
251 __gk20a_fifo_create_stats(g, percentiles_ioctl,
252 PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
253 __gk20a_fifo_create_stats(g, percentiles_kickoff,
254 PROFILE_END, PROFILE_ENTRY);
255 __gk20a_fifo_create_stats(g, percentiles_jobtracking,
256 PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
257 __gk20a_fifo_create_stats(g, percentiles_append,
258 PROFILE_APPEND, PROFILE_JOB_TRACKING);
259 nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
260 PROFILE_END, PROFILE_APPEND);
261
262 seq_printf(s, "Number of kickoffs: %d\n", nelem);
263 seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
264
265 for (index = 0; index < PERCENTILE_RANGES; index++)
266 seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
267 PERCENTILE_WIDTH * (index+1),
268 percentiles_ioctl[index],
269 percentiles_kickoff[index],
270 percentiles_append[index],
271 percentiles_jobtracking[index],
272 percentiles_userd[index]);
273
274 kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
275
276 return 0;
277}
278
279static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
280{
281 return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
282}
283
284static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
285 .open = gk20a_fifo_profile_stats_open,
286 .read = seq_read,
287 .llseek = seq_lseek,
288 .release = single_release,
289};
290
291
292void gk20a_fifo_debugfs_init(struct gk20a *g)
293{
294 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
295
296 struct dentry *gpu_root = platform->debugfs;
297 struct dentry *fifo_root;
298 struct dentry *profile_root;
299
300 fifo_root = debugfs_create_dir("fifo", gpu_root);
301 if (IS_ERR_OR_NULL(fifo_root))
302 return;
303
304 gk20a_dbg(gpu_dbg_info, "g=%p", g);
305
306 debugfs_create_file("sched", 0600, fifo_root, g,
307 &gk20a_fifo_sched_debugfs_fops);
308
309 profile_root = debugfs_create_dir("profile", fifo_root);
310 if (IS_ERR_OR_NULL(profile_root))
311 return;
312
313 nvgpu_mutex_init(&g->fifo.profile.lock);
314 g->fifo.profile.enabled = false;
315 atomic_set(&g->fifo.profile.get, 0);
316 atomic_set(&g->fifo.profile.ref.refcount, 0);
317
318 debugfs_create_file("enable", 0600, profile_root, g,
319 &gk20a_fifo_profile_enable_debugfs_fops);
320
321 debugfs_create_file("stats", 0600, profile_root, g,
322 &gk20a_fifo_profile_stats_debugfs_fops);
323
324}
325
326void __gk20a_fifo_profile_free(struct kref *ref)
327{
328 struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
329 profile.ref);
330 nvgpu_vfree(f->g, f->profile.data);
331 nvgpu_vfree(f->g, f->profile.sorted);
332}
333
334/* Get the next element in the ring buffer of profile entries
335 * and grab a reference to the structure
336 */
337struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
338{
339 struct fifo_gk20a *f = &g->fifo;
340 struct fifo_profile_gk20a *profile;
341 unsigned int index;
342
343 /* If kref is zero, profiling is not enabled */
344 if (!kref_get_unless_zero(&f->profile.ref))
345 return NULL;
346 index = atomic_inc_return(&f->profile.get);
347 profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
348
349 return profile;
350}
351
352/* Free the reference to the structure. This allows deferred cleanups */
353void gk20a_fifo_profile_release(struct gk20a *g,
354 struct fifo_profile_gk20a *profile)
355{
356 kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
357}
358
359void gk20a_fifo_debugfs_deinit(struct gk20a *g)
360{
361 struct fifo_gk20a *f = &g->fifo;
362
363 nvgpu_mutex_acquire(&f->profile.lock);
364 if (f->profile.enabled) {
365 f->profile.enabled = false;
366 kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
367 }
368 nvgpu_mutex_release(&f->profile.lock);
369}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.h b/drivers/gpu/nvgpu/common/linux/debug_fifo.h
new file mode 100644
index 00000000..46ac853e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.h
@@ -0,0 +1,22 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_FIFO_H__
16#define __NVGPU_DEBUG_FIFO_H__
17
18struct gk20a;
19void gk20a_fifo_debugfs_init(struct gk20a *g);
20void gk20a_fifo_debugfs_deinit(struct gk20a *g);
21
22#endif /* __NVGPU_DEBUG_FIFO_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_gr.c b/drivers/gpu/nvgpu/common/linux/debug_gr.c
new file mode 100644
index 00000000..56b8612e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_gr.c
@@ -0,0 +1,31 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_gr.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19
20int gr_gk20a_debugfs_init(struct gk20a *g)
21{
22 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
23
24 g->debugfs_gr_default_attrib_cb_size =
25 debugfs_create_u32("gr_default_attrib_cb_size",
26 S_IRUGO|S_IWUSR, platform->debugfs,
27 &g->gr.attrib_cb_default_size);
28
29 return 0;
30}
31
diff --git a/drivers/gpu/nvgpu/common/linux/debug_gr.h b/drivers/gpu/nvgpu/common/linux/debug_gr.h
new file mode 100644
index 00000000..4b46acbb
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_gr.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_GR_H__
16#define __NVGPU_DEBUG_GR_H__
17
18struct gk20a;
19int gr_gk20a_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_GR_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.c b/drivers/gpu/nvgpu/common/linux/debug_kmem.c
new file mode 100644
index 00000000..2ee542a8
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_kmem.c
@@ -0,0 +1,315 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_kmem.h"
16#include "kmem_priv.h"
17#include "gk20a/platform_gk20a.h"
18
19#include <linux/debugfs.h>
20#include <linux/seq_file.h>
21
22#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
23/**
24 * to_human_readable_bytes - Determine suffix for passed size.
25 *
26 * @bytes - Number of bytes to generate a suffix for.
27 * @hr_bytes [out] - The human readable number of bytes.
28 * @hr_suffix [out] - The suffix for the HR number of bytes.
29 *
30 * Computes a human readable decomposition of the passed number of bytes. The
31 * suffix for the bytes is passed back through the @hr_suffix pointer. The right
32 * number of bytes is then passed back in @hr_bytes. This returns the following
33 * ranges:
34 *
35 * 0 - 1023 B
36 * 1 - 1023 KB
37 * 1 - 1023 MB
38 * 1 - 1023 GB
39 * 1 - 1023 TB
40 * 1 - ... PB
41 */
42static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
43 const char **hr_suffix)
44{
45 static const char *suffixes[] =
46 { "B", "KB", "MB", "GB", "TB", "PB" };
47
48 u64 suffix_ind = 0;
49
50 while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
51 bytes >>= 10;
52 suffix_ind++;
53 }
54
55 /*
56 * Handle case where bytes > 1023PB.
57 */
58 suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
59 suffix_ind : ARRAY_SIZE(suffixes) - 1;
60
61 *hr_bytes = bytes;
62 *hr_suffix = suffixes[suffix_ind];
63}
64
65/**
66 * print_hr_bytes - Print human readable bytes
67 *
68 * @s - A seq_file to print to. May be NULL.
69 * @msg - A message to print before the bytes.
70 * @bytes - Number of bytes.
71 *
72 * Print @msg followed by the human readable decomposition of the passed number
73 * of bytes.
74 *
75 * If @s is NULL then this prints will be made to the kernel log.
76 */
77static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
78{
79 u64 hr_bytes;
80 const char *hr_suffix;
81
82 __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
83 __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
84}
85
86/**
87 * print_histogram - Build a histogram of the memory usage.
88 *
89 * @tracker The tracking to pull data from.
90 * @s A seq_file to dump info into.
91 */
92static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
93 struct seq_file *s)
94{
95 int i;
96 u64 pot_min, pot_max;
97 u64 nr_buckets;
98 unsigned int *buckets;
99 unsigned int total_allocs;
100 struct nvgpu_rbtree_node *node;
101 static const char histogram_line[] =
102 "++++++++++++++++++++++++++++++++++++++++";
103
104 /*
105 * pot_min is essentially a round down to the nearest power of 2. This
106 * is the start of the histogram. pot_max is just a round up to the
107 * nearest power of two. Each histogram bucket is one power of two so
108 * the histogram buckets are exponential.
109 */
110 pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
111 pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
112
113 nr_buckets = __ffs(pot_max) - __ffs(pot_min);
114
115 buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
116 if (!buckets) {
117 __pstat(s, "OOM: could not allocate bucket storage!?\n");
118 return;
119 }
120
121 /*
122 * Iterate across all of the allocs and determine what bucket they
123 * should go in. Round the size down to the nearest power of two to
124 * find the right bucket.
125 */
126 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
127 while (node) {
128 int b;
129 u64 bucket_min;
130 struct nvgpu_mem_alloc *alloc =
131 nvgpu_mem_alloc_from_rbtree_node(node);
132
133 bucket_min = (u64)rounddown_pow_of_two(alloc->size);
134 if (bucket_min < tracker->min_alloc)
135 bucket_min = tracker->min_alloc;
136
137 b = __ffs(bucket_min) - __ffs(pot_min);
138
139 /*
140 * Handle the one case were there's an alloc exactly as big as
141 * the maximum bucket size of the largest bucket. Most of the
142 * buckets have an inclusive minimum and exclusive maximum. But
143 * the largest bucket needs to have an _inclusive_ maximum as
144 * well.
145 */
146 if (b == (int)nr_buckets)
147 b--;
148
149 buckets[b]++;
150
151 nvgpu_rbtree_enum_next(&node, node);
152 }
153
154 total_allocs = 0;
155 for (i = 0; i < (int)nr_buckets; i++)
156 total_allocs += buckets[i];
157
158 __pstat(s, "Alloc histogram:\n");
159
160 /*
161 * Actually compute the histogram lines.
162 */
163 for (i = 0; i < (int)nr_buckets; i++) {
164 char this_line[sizeof(histogram_line) + 1];
165 u64 line_length;
166 u64 hr_bytes;
167 const char *hr_suffix;
168
169 memset(this_line, 0, sizeof(this_line));
170
171 /*
172 * Compute the normalized line length. Cant use floating point
173 * so we will just multiply everything by 1000 and use fixed
174 * point.
175 */
176 line_length = (1000 * buckets[i]) / total_allocs;
177 line_length *= sizeof(histogram_line);
178 line_length /= 1000;
179
180 memset(this_line, '+', line_length);
181
182 __to_human_readable_bytes(1 << (__ffs(pot_min) + i),
183 &hr_bytes, &hr_suffix);
184 __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
185 hr_bytes, hr_bytes << 1,
186 hr_suffix, buckets[i], this_line);
187 }
188}
189
190/**
191 * nvgpu_kmem_print_stats - Print kmem tracking stats.
192 *
193 * @tracker The tracking to pull data from.
194 * @s A seq_file to dump info into.
195 *
196 * Print stats from a tracker. If @s is non-null then seq_printf() will be
197 * used with @s. Otherwise the stats are pr_info()ed.
198 */
199void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
200 struct seq_file *s)
201{
202 nvgpu_lock_tracker(tracker);
203
204 __pstat(s, "Mem tracker: %s\n\n", tracker->name);
205
206 __pstat(s, "Basic Stats:\n");
207 __pstat(s, " Number of allocs %lld\n",
208 tracker->nr_allocs);
209 __pstat(s, " Number of frees %lld\n",
210 tracker->nr_frees);
211 print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
212 print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
213 print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
214 print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
215 print_hr_bytes(s, " Bytes allocated (real) ",
216 tracker->bytes_alloced_real);
217 print_hr_bytes(s, " Bytes freed (real) ",
218 tracker->bytes_freed_real);
219 __pstat(s, "\n");
220
221 print_histogram(tracker, s);
222
223 nvgpu_unlock_tracker(tracker);
224}
225
226static int __kmem_tracking_show(struct seq_file *s, void *unused)
227{
228 struct nvgpu_mem_alloc_tracker *tracker = s->private;
229
230 nvgpu_kmem_print_stats(tracker, s);
231
232 return 0;
233}
234
235static int __kmem_tracking_open(struct inode *inode, struct file *file)
236{
237 return single_open(file, __kmem_tracking_show, inode->i_private);
238}
239
240static const struct file_operations __kmem_tracking_fops = {
241 .open = __kmem_tracking_open,
242 .read = seq_read,
243 .llseek = seq_lseek,
244 .release = single_release,
245};
246
247static int __kmem_traces_dump_tracker(struct gk20a *g,
248 struct nvgpu_mem_alloc_tracker *tracker,
249 struct seq_file *s)
250{
251 struct nvgpu_rbtree_node *node;
252
253 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
254 while (node) {
255 struct nvgpu_mem_alloc *alloc =
256 nvgpu_mem_alloc_from_rbtree_node(node);
257
258 kmem_print_mem_alloc(g, alloc, s);
259
260 nvgpu_rbtree_enum_next(&node, node);
261 }
262
263 return 0;
264}
265
266static int __kmem_traces_show(struct seq_file *s, void *unused)
267{
268 struct gk20a *g = s->private;
269
270 nvgpu_lock_tracker(g->vmallocs);
271 seq_puts(s, "Oustanding vmallocs:\n");
272 __kmem_traces_dump_tracker(g, g->vmallocs, s);
273 seq_puts(s, "\n");
274 nvgpu_unlock_tracker(g->vmallocs);
275
276 nvgpu_lock_tracker(g->kmallocs);
277 seq_puts(s, "Oustanding kmallocs:\n");
278 __kmem_traces_dump_tracker(g, g->kmallocs, s);
279 nvgpu_unlock_tracker(g->kmallocs);
280
281 return 0;
282}
283
284static int __kmem_traces_open(struct inode *inode, struct file *file)
285{
286 return single_open(file, __kmem_traces_show, inode->i_private);
287}
288
289static const struct file_operations __kmem_traces_fops = {
290 .open = __kmem_traces_open,
291 .read = seq_read,
292 .llseek = seq_lseek,
293 .release = single_release,
294};
295
296void nvgpu_kmem_debugfs_init(struct gk20a *g)
297{
298 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
299 struct dentry *node;
300
301 g->debugfs_kmem = debugfs_create_dir("kmem_tracking", platform->debugfs);
302 if (IS_ERR_OR_NULL(g->debugfs_kmem))
303 return;
304
305 node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
306 g->debugfs_kmem,
307 g->vmallocs, &__kmem_tracking_fops);
308 node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
309 g->debugfs_kmem,
310 g->kmallocs, &__kmem_tracking_fops);
311 node = debugfs_create_file("traces", S_IRUGO,
312 g->debugfs_kmem,
313 g, &__kmem_traces_fops);
314}
315#endif
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.h b/drivers/gpu/nvgpu/common/linux/debug_kmem.h
new file mode 100644
index 00000000..44322b53
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_kmem.h
@@ -0,0 +1,23 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_KMEM_H__
16#define __NVGPU_DEBUG_KMEM_H__
17
18struct gk20a;
19#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
20void nvgpu_kmem_debugfs_init(struct gk20a *g);
21#endif
22
23#endif /* __NVGPU_DEBUG_KMEM_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_mm.c b/drivers/gpu/nvgpu/common/linux/debug_mm.c
new file mode 100644
index 00000000..1e260f89
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_mm.c
@@ -0,0 +1,26 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_mm.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19
20void gk20a_mm_debugfs_init(struct gk20a *g)
21{
22 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
23
24 debugfs_create_bool("force_pramin", 0664, platform->debugfs,
25 &g->mm.force_pramin);
26}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_mm.h b/drivers/gpu/nvgpu/common/linux/debug_mm.h
new file mode 100644
index 00000000..bf7bc985
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_mm.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_MM_H__
16#define __NVGPU_DEBUG_MM_H__
17
18struct gk20a;
19void gk20a_mm_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_MM_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.c b/drivers/gpu/nvgpu/common/linux/debug_pmu.c
new file mode 100644
index 00000000..f19f5139
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.c
@@ -0,0 +1,479 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_pmu.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20#include <linux/uaccess.h>
21
22static int lpwr_debug_show(struct seq_file *s, void *data)
23{
24 struct gk20a *g = s->private;
25
26 if (g->ops.pmu.pmu_pg_engines_feature_list &&
27 g->ops.pmu.pmu_pg_engines_feature_list(g,
28 PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
29 PMU_PG_FEATURE_GR_POWER_GATING_ENABLED) {
30 seq_printf(s, "PSTATE: %u\n"
31 "RPPG Enabled: %u\n"
32 "RPPG ref count: %u\n"
33 "RPPG state: %u\n"
34 "MSCG Enabled: %u\n"
35 "MSCG pstate state: %u\n"
36 "MSCG transition state: %u\n",
37 g->ops.clk_arb.get_current_pstate(g),
38 g->elpg_enabled, g->pmu.elpg_refcnt,
39 g->pmu.elpg_stat, g->mscg_enabled,
40 g->pmu.mscg_stat, g->pmu.mscg_transition_state);
41
42 } else
43 seq_printf(s, "ELPG Enabled: %u\n"
44 "ELPG ref count: %u\n"
45 "ELPG state: %u\n",
46 g->elpg_enabled, g->pmu.elpg_refcnt,
47 g->pmu.elpg_stat);
48
49 return 0;
50
51}
52
53static int lpwr_debug_open(struct inode *inode, struct file *file)
54{
55 return single_open(file, lpwr_debug_show, inode->i_private);
56}
57
58static const struct file_operations lpwr_debug_fops = {
59 .open = lpwr_debug_open,
60 .read = seq_read,
61 .llseek = seq_lseek,
62 .release = single_release,
63};
64
65static int mscg_stat_show(struct seq_file *s, void *data)
66{
67 struct gk20a *g = s->private;
68 u64 total_ingating, total_ungating, residency, divisor, dividend;
69 struct pmu_pg_stats_data pg_stat_data = { 0 };
70 int err;
71
72 /* Don't unnecessarily power on the device */
73 if (g->power_on) {
74 err = gk20a_busy(g);
75 if (err)
76 return err;
77
78 gk20a_pmu_get_pg_stats(g,
79 PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
80 gk20a_idle(g);
81 }
82 total_ingating = g->pg_ingating_time_us +
83 (u64)pg_stat_data.ingating_time;
84 total_ungating = g->pg_ungating_time_us +
85 (u64)pg_stat_data.ungating_time;
86
87 divisor = total_ingating + total_ungating;
88
89 /* We compute the residency on a scale of 1000 */
90 dividend = total_ingating * 1000;
91
92 if (divisor)
93 residency = div64_u64(dividend, divisor);
94 else
95 residency = 0;
96
97 seq_printf(s,
98 "Time in MSCG: %llu us\n"
99 "Time out of MSCG: %llu us\n"
100 "MSCG residency ratio: %llu\n"
101 "MSCG Entry Count: %u\n"
102 "MSCG Avg Entry latency %u\n"
103 "MSCG Avg Exit latency %u\n",
104 total_ingating, total_ungating,
105 residency, pg_stat_data.gating_cnt,
106 pg_stat_data.avg_entry_latency_us,
107 pg_stat_data.avg_exit_latency_us);
108 return 0;
109
110}
111
112static int mscg_stat_open(struct inode *inode, struct file *file)
113{
114 return single_open(file, mscg_stat_show, inode->i_private);
115}
116
117static const struct file_operations mscg_stat_fops = {
118 .open = mscg_stat_open,
119 .read = seq_read,
120 .llseek = seq_lseek,
121 .release = single_release,
122};
123
124static int mscg_transitions_show(struct seq_file *s, void *data)
125{
126 struct gk20a *g = s->private;
127 struct pmu_pg_stats_data pg_stat_data = { 0 };
128 u32 total_gating_cnt;
129 int err;
130
131 if (g->power_on) {
132 err = gk20a_busy(g);
133 if (err)
134 return err;
135
136 gk20a_pmu_get_pg_stats(g,
137 PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
138 gk20a_idle(g);
139 }
140 total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
141
142 seq_printf(s, "%u\n", total_gating_cnt);
143 return 0;
144
145}
146
147static int mscg_transitions_open(struct inode *inode, struct file *file)
148{
149 return single_open(file, mscg_transitions_show, inode->i_private);
150}
151
152static const struct file_operations mscg_transitions_fops = {
153 .open = mscg_transitions_open,
154 .read = seq_read,
155 .llseek = seq_lseek,
156 .release = single_release,
157};
158
159static int elpg_stat_show(struct seq_file *s, void *data)
160{
161 struct gk20a *g = s->private;
162 struct pmu_pg_stats_data pg_stat_data = { 0 };
163 u64 total_ingating, total_ungating, residency, divisor, dividend;
164 int err;
165
166 /* Don't unnecessarily power on the device */
167 if (g->power_on) {
168 err = gk20a_busy(g);
169 if (err)
170 return err;
171
172 gk20a_pmu_get_pg_stats(g,
173 PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
174 gk20a_idle(g);
175 }
176 total_ingating = g->pg_ingating_time_us +
177 (u64)pg_stat_data.ingating_time;
178 total_ungating = g->pg_ungating_time_us +
179 (u64)pg_stat_data.ungating_time;
180 divisor = total_ingating + total_ungating;
181
182 /* We compute the residency on a scale of 1000 */
183 dividend = total_ingating * 1000;
184
185 if (divisor)
186 residency = div64_u64(dividend, divisor);
187 else
188 residency = 0;
189
190 seq_printf(s,
191 "Time in ELPG: %llu us\n"
192 "Time out of ELPG: %llu us\n"
193 "ELPG residency ratio: %llu\n"
194 "ELPG Entry Count: %u\n"
195 "ELPG Avg Entry latency %u us\n"
196 "ELPG Avg Exit latency %u us\n",
197 total_ingating, total_ungating,
198 residency, pg_stat_data.gating_cnt,
199 pg_stat_data.avg_entry_latency_us,
200 pg_stat_data.avg_exit_latency_us);
201 return 0;
202
203}
204
205static int elpg_stat_open(struct inode *inode, struct file *file)
206{
207 return single_open(file, elpg_stat_show, inode->i_private);
208}
209
210static const struct file_operations elpg_stat_fops = {
211 .open = elpg_stat_open,
212 .read = seq_read,
213 .llseek = seq_lseek,
214 .release = single_release,
215};
216
217static int elpg_transitions_show(struct seq_file *s, void *data)
218{
219 struct gk20a *g = s->private;
220 struct pmu_pg_stats_data pg_stat_data = { 0 };
221 u32 total_gating_cnt;
222 int err;
223
224 if (g->power_on) {
225 err = gk20a_busy(g);
226 if (err)
227 return err;
228
229 gk20a_pmu_get_pg_stats(g,
230 PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
231 gk20a_idle(g);
232 }
233 total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
234
235 seq_printf(s, "%u\n", total_gating_cnt);
236 return 0;
237
238}
239
240static int elpg_transitions_open(struct inode *inode, struct file *file)
241{
242 return single_open(file, elpg_transitions_show, inode->i_private);
243}
244
245static const struct file_operations elpg_transitions_fops = {
246 .open = elpg_transitions_open,
247 .read = seq_read,
248 .llseek = seq_lseek,
249 .release = single_release,
250};
251
252static int falc_trace_show(struct seq_file *s, void *data)
253{
254 struct gk20a *g = s->private;
255 struct pmu_gk20a *pmu = &g->pmu;
256 u32 i = 0, j = 0, k, l, m;
257 char part_str[40];
258 void *tracebuffer;
259 char *trace;
260 u32 *trace1;
261
262 /* allocate system memory to copy pmu trace buffer */
263 tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
264 if (tracebuffer == NULL)
265 return -ENOMEM;
266
267 /* read pmu traces into system memory buffer */
268 nvgpu_mem_rd_n(g, &pmu->trace_buf,
269 0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE);
270
271 trace = (char *)tracebuffer;
272 trace1 = (u32 *)tracebuffer;
273
274 for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
275 for (j = 0; j < 0x40; j++)
276 if (trace1[(i / 4) + j])
277 break;
278 if (j == 0x40)
279 break;
280 seq_printf(s, "Index %x: ", trace1[(i / 4)]);
281 l = 0;
282 m = 0;
283 while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) {
284 if (k >= 40)
285 break;
286 strncpy(part_str, (trace+i+20+m), k);
287 part_str[k] = 0;
288 seq_printf(s, "%s0x%x", part_str,
289 trace1[(i / 4) + 1 + l]);
290 l++;
291 m += k + 2;
292 }
293 seq_printf(s, "%s", (trace+i+20+m));
294 }
295
296 nvgpu_kfree(g, tracebuffer);
297 return 0;
298}
299
300static int falc_trace_open(struct inode *inode, struct file *file)
301{
302 return single_open(file, falc_trace_show, inode->i_private);
303}
304
305static const struct file_operations falc_trace_fops = {
306 .open = falc_trace_open,
307 .read = seq_read,
308 .llseek = seq_lseek,
309 .release = single_release,
310};
311
312static int perfmon_events_enable_show(struct seq_file *s, void *data)
313{
314 struct gk20a *g = s->private;
315
316 seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
317 return 0;
318
319}
320
321static int perfmon_events_enable_open(struct inode *inode, struct file *file)
322{
323 return single_open(file, perfmon_events_enable_show, inode->i_private);
324}
325
326static ssize_t perfmon_events_enable_write(struct file *file,
327 const char __user *userbuf, size_t count, loff_t *ppos)
328{
329 struct seq_file *s = file->private_data;
330 struct gk20a *g = s->private;
331 unsigned long val = 0;
332 char buf[40];
333 int buf_size;
334 int err;
335
336 memset(buf, 0, sizeof(buf));
337 buf_size = min(count, (sizeof(buf)-1));
338
339 if (copy_from_user(buf, userbuf, buf_size))
340 return -EFAULT;
341
342 if (kstrtoul(buf, 10, &val) < 0)
343 return -EINVAL;
344
345 /* Don't turn on gk20a unnecessarily */
346 if (g->power_on) {
347 err = gk20a_busy(g);
348 if (err)
349 return err;
350
351 if (val && !g->pmu.perfmon_sampling_enabled) {
352 g->pmu.perfmon_sampling_enabled = true;
353 nvgpu_pmu_perfmon_start_sampling(&(g->pmu));
354 } else if (!val && g->pmu.perfmon_sampling_enabled) {
355 g->pmu.perfmon_sampling_enabled = false;
356 nvgpu_pmu_perfmon_stop_sampling(&(g->pmu));
357 }
358 gk20a_idle(g);
359 } else {
360 g->pmu.perfmon_sampling_enabled = val ? true : false;
361 }
362
363 return count;
364}
365
366static const struct file_operations perfmon_events_enable_fops = {
367 .open = perfmon_events_enable_open,
368 .read = seq_read,
369 .write = perfmon_events_enable_write,
370 .llseek = seq_lseek,
371 .release = single_release,
372};
373
374static int perfmon_events_count_show(struct seq_file *s, void *data)
375{
376 struct gk20a *g = s->private;
377
378 seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
379 return 0;
380
381}
382
383static int perfmon_events_count_open(struct inode *inode, struct file *file)
384{
385 return single_open(file, perfmon_events_count_show, inode->i_private);
386}
387
388static const struct file_operations perfmon_events_count_fops = {
389 .open = perfmon_events_count_open,
390 .read = seq_read,
391 .llseek = seq_lseek,
392 .release = single_release,
393};
394
395static int security_show(struct seq_file *s, void *data)
396{
397 struct gk20a *g = s->private;
398
399 seq_printf(s, "%d\n", g->pmu.pmu_mode);
400 return 0;
401
402}
403
404static int security_open(struct inode *inode, struct file *file)
405{
406 return single_open(file, security_show, inode->i_private);
407}
408
409static const struct file_operations security_fops = {
410 .open = security_open,
411 .read = seq_read,
412 .llseek = seq_lseek,
413 .release = single_release,
414};
415
416int gk20a_pmu_debugfs_init(struct gk20a *g)
417{
418 struct dentry *d;
419 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
420
421 d = debugfs_create_file(
422 "lpwr_debug", S_IRUGO|S_IWUSR, platform->debugfs, g,
423 &lpwr_debug_fops);
424 if (!d)
425 goto err_out;
426
427 d = debugfs_create_file(
428 "mscg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
429 &mscg_stat_fops);
430 if (!d)
431 goto err_out;
432
433 d = debugfs_create_file(
434 "mscg_transitions", S_IRUGO, platform->debugfs, g,
435 &mscg_transitions_fops);
436 if (!d)
437 goto err_out;
438
439 d = debugfs_create_file(
440 "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
441 &elpg_stat_fops);
442 if (!d)
443 goto err_out;
444
445 d = debugfs_create_file(
446 "elpg_transitions", S_IRUGO, platform->debugfs, g,
447 &elpg_transitions_fops);
448 if (!d)
449 goto err_out;
450
451 d = debugfs_create_file(
452 "falc_trace", S_IRUGO, platform->debugfs, g,
453 &falc_trace_fops);
454 if (!d)
455 goto err_out;
456
457 d = debugfs_create_file(
458 "perfmon_events_enable", S_IRUGO, platform->debugfs, g,
459 &perfmon_events_enable_fops);
460 if (!d)
461 goto err_out;
462
463 d = debugfs_create_file(
464 "perfmon_events_count", S_IRUGO, platform->debugfs, g,
465 &perfmon_events_count_fops);
466 if (!d)
467 goto err_out;
468
469 d = debugfs_create_file(
470 "pmu_security", S_IRUGO, platform->debugfs, g,
471 &security_fops);
472 if (!d)
473 goto err_out;
474 return 0;
475err_out:
476 pr_err("%s: Failed to make debugfs node\n", __func__);
477 debugfs_remove_recursive(platform->debugfs);
478 return -ENOMEM;
479}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.h b/drivers/gpu/nvgpu/common/linux/debug_pmu.h
new file mode 100644
index 00000000..c4e3243d
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_PMU_H__
16#define __NVGPU_DEBUG_PMU_H__
17
18struct gk20a;
19int gk20a_pmu_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_PMU_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.c b/drivers/gpu/nvgpu/common/linux/debug_sched.c
new file mode 100644
index 00000000..40b93149
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_sched.c
@@ -0,0 +1,79 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_sched.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
22{
23 struct gk20a *g = s->private;
24 struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
25 bool sched_busy = true;
26
27 int n = sched->bitmap_size / sizeof(u64);
28 int i;
29 int err;
30
31 err = gk20a_busy(g);
32 if (err)
33 return err;
34
35 if (nvgpu_mutex_tryacquire(&sched->busy_lock)) {
36 sched_busy = false;
37 nvgpu_mutex_release(&sched->busy_lock);
38 }
39
40 seq_printf(s, "control_locked=%d\n", sched->control_locked);
41 seq_printf(s, "busy=%d\n", sched_busy);
42 seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
43
44 nvgpu_mutex_acquire(&sched->status_lock);
45
46 seq_puts(s, "active_tsg_bitmap\n");
47 for (i = 0; i < n; i++)
48 seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]);
49
50 seq_puts(s, "recent_tsg_bitmap\n");
51 for (i = 0; i < n; i++)
52 seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
53
54 nvgpu_mutex_release(&sched->status_lock);
55
56 gk20a_idle(g);
57
58 return 0;
59}
60
61static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file)
62{
63 return single_open(file, gk20a_sched_debugfs_show, inode->i_private);
64}
65
66static const struct file_operations gk20a_sched_debugfs_fops = {
67 .open = gk20a_sched_debugfs_open,
68 .read = seq_read,
69 .llseek = seq_lseek,
70 .release = single_release,
71};
72
73void gk20a_sched_debugfs_init(struct gk20a *g)
74{
75 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
76
77 debugfs_create_file("sched_ctrl", S_IRUGO, platform->debugfs,
78 g, &gk20a_sched_debugfs_fops);
79}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.h b/drivers/gpu/nvgpu/common/linux/debug_sched.h
new file mode 100644
index 00000000..34a8f55f
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_sched.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_SCHED_H__
16#define __NVGPU_DEBUG_SCHED_H__
17
18struct gk20a;
19void gk20a_sched_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_SCHED_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.c b/drivers/gpu/nvgpu/common/linux/driver_common.c
index 80e7698b..f85016d4 100644
--- a/drivers/gpu/nvgpu/common/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/common/linux/driver_common.c
@@ -21,6 +21,7 @@
21#include <nvgpu/soc.h> 21#include <nvgpu/soc.h>
22#include <nvgpu/bug.h> 22#include <nvgpu/bug.h>
23#include <nvgpu/enabled.h> 23#include <nvgpu/enabled.h>
24#include <nvgpu/debug.h>
24 25
25#include "gk20a/gk20a_scale.h" 26#include "gk20a/gk20a_scale.h"
26#include "gk20a/gk20a.h" 27#include "gk20a/gk20a.h"
@@ -182,7 +183,7 @@ int nvgpu_probe(struct gk20a *g,
182 nvgpu_init_mm_vars(g); 183 nvgpu_init_mm_vars(g);
183 184
184 gk20a_create_sysfs(g->dev); 185 gk20a_create_sysfs(g->dev);
185 gk20a_debug_init(g->dev, debugfs_symlink); 186 gk20a_debug_init(g, debugfs_symlink);
186 187
187 g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K); 188 g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
188 if (!g->dbg_regops_tmp_buf) { 189 if (!g->dbg_regops_tmp_buf) {
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
index 2502ff30..d81328f0 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -26,9 +26,9 @@
26#include <nvgpu/kmem.h> 26#include <nvgpu/kmem.h>
27#include <nvgpu/log.h> 27#include <nvgpu/log.h>
28#include <nvgpu/list.h> 28#include <nvgpu/list.h>
29#include <nvgpu/debug.h>
29 30
30#include "gk20a/gk20a.h" 31#include "gk20a/gk20a.h"
31#include "gk20a/debug_gk20a.h"
32#include "gk20a/ctxsw_trace_gk20a.h" 32#include "gk20a/ctxsw_trace_gk20a.h"
33#include "gk20a/dbg_gpu_gk20a.h" 33#include "gk20a/dbg_gpu_gk20a.h"
34#include "gk20a/fence_gk20a.h" 34#include "gk20a/fence_gk20a.h"
diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c
index d058eba5..41aaa729 100644
--- a/drivers/gpu/nvgpu/common/linux/kmem.c
+++ b/drivers/gpu/nvgpu/common/linux/kmem.c
@@ -134,19 +134,19 @@ void __nvgpu_vfree(struct gk20a *g, void *addr)
134 134
135#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE 135#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
136 136
137static void lock_tracker(struct nvgpu_mem_alloc_tracker *tracker) 137void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
138{ 138{
139 nvgpu_mutex_acquire(&tracker->lock); 139 nvgpu_mutex_acquire(&tracker->lock);
140} 140}
141 141
142static void unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker) 142void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
143{ 143{
144 nvgpu_mutex_release(&tracker->lock); 144 nvgpu_mutex_release(&tracker->lock);
145} 145}
146 146
147static void kmem_print_mem_alloc(struct gk20a *g, 147void kmem_print_mem_alloc(struct gk20a *g,
148 struct nvgpu_mem_alloc *alloc, 148 struct nvgpu_mem_alloc *alloc,
149 struct seq_file *s) 149 struct seq_file *s)
150{ 150{
151#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES 151#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
152 int i; 152 int i;
@@ -231,7 +231,7 @@ static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
231 alloc->stack_length = stack_trace.nr_entries; 231 alloc->stack_length = stack_trace.nr_entries;
232#endif 232#endif
233 233
234 lock_tracker(tracker); 234 nvgpu_lock_tracker(tracker);
235 tracker->bytes_alloced += size; 235 tracker->bytes_alloced += size;
236 tracker->bytes_alloced_real += real_size; 236 tracker->bytes_alloced_real += real_size;
237 tracker->nr_allocs++; 237 tracker->nr_allocs++;
@@ -246,10 +246,10 @@ static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
246 if (ret) { 246 if (ret) {
247 WARN(1, "Duplicate alloc??? 0x%llx\n", addr); 247 WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
248 kfree(alloc); 248 kfree(alloc);
249 unlock_tracker(tracker); 249 nvgpu_unlock_tracker(tracker);
250 return ret; 250 return ret;
251 } 251 }
252 unlock_tracker(tracker); 252 nvgpu_unlock_tracker(tracker);
253 253
254 return 0; 254 return 0;
255} 255}
@@ -259,17 +259,17 @@ static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
259{ 259{
260 struct nvgpu_mem_alloc *alloc; 260 struct nvgpu_mem_alloc *alloc;
261 261
262 lock_tracker(tracker); 262 nvgpu_lock_tracker(tracker);
263 alloc = nvgpu_rem_alloc(tracker, addr); 263 alloc = nvgpu_rem_alloc(tracker, addr);
264 if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) { 264 if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
265 unlock_tracker(tracker); 265 nvgpu_unlock_tracker(tracker);
266 return -EINVAL; 266 return -EINVAL;
267 } 267 }
268 268
269 tracker->nr_frees++; 269 tracker->nr_frees++;
270 tracker->bytes_freed += alloc->size; 270 tracker->bytes_freed += alloc->size;
271 tracker->bytes_freed_real += alloc->real_size; 271 tracker->bytes_freed_real += alloc->real_size;
272 unlock_tracker(tracker); 272 nvgpu_unlock_tracker(tracker);
273 273
274 return 0; 274 return 0;
275} 275}
@@ -407,307 +407,6 @@ void __nvgpu_track_kfree(struct gk20a *g, void *addr)
407 __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr); 407 __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
408} 408}
409 409
410/**
411 * to_human_readable_bytes - Determine suffix for passed size.
412 *
413 * @bytes - Number of bytes to generate a suffix for.
414 * @hr_bytes [out] - The human readable number of bytes.
415 * @hr_suffix [out] - The suffix for the HR number of bytes.
416 *
417 * Computes a human readable decomposition of the passed number of bytes. The
418 * suffix for the bytes is passed back through the @hr_suffix pointer. The right
419 * number of bytes is then passed back in @hr_bytes. This returns the following
420 * ranges:
421 *
422 * 0 - 1023 B
423 * 1 - 1023 KB
424 * 1 - 1023 MB
425 * 1 - 1023 GB
426 * 1 - 1023 TB
427 * 1 - ... PB
428 */
429static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
430 const char **hr_suffix)
431{
432 static const char *suffixes[] =
433 { "B", "KB", "MB", "GB", "TB", "PB" };
434
435 u64 suffix_ind = 0;
436
437 while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
438 bytes >>= 10;
439 suffix_ind++;
440 }
441
442 /*
443 * Handle case where bytes > 1023PB.
444 */
445 suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
446 suffix_ind : ARRAY_SIZE(suffixes) - 1;
447
448 *hr_bytes = bytes;
449 *hr_suffix = suffixes[suffix_ind];
450}
451
452/**
453 * print_hr_bytes - Print human readable bytes
454 *
455 * @s - A seq_file to print to. May be NULL.
456 * @msg - A message to print before the bytes.
457 * @bytes - Number of bytes.
458 *
459 * Print @msg followed by the human readable decomposition of the passed number
460 * of bytes.
461 *
462 * If @s is NULL then this prints will be made to the kernel log.
463 */
464static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
465{
466 u64 hr_bytes;
467 const char *hr_suffix;
468
469 __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
470 __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
471}
472
473/**
474 * print_histogram - Build a histogram of the memory usage.
475 *
476 * @tracker The tracking to pull data from.
477 * @s A seq_file to dump info into.
478 */
479static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
480 struct seq_file *s)
481{
482 int i;
483 u64 pot_min, pot_max;
484 u64 nr_buckets;
485 unsigned int *buckets;
486 unsigned int total_allocs;
487 struct nvgpu_rbtree_node *node;
488 static const char histogram_line[] =
489 "++++++++++++++++++++++++++++++++++++++++";
490
491 /*
492 * pot_min is essentially a round down to the nearest power of 2. This
493 * is the start of the histogram. pot_max is just a round up to the
494 * nearest power of two. Each histogram bucket is one power of two so
495 * the histogram buckets are exponential.
496 */
497 pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
498 pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
499
500 nr_buckets = __ffs(pot_max) - __ffs(pot_min);
501
502 buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
503 if (!buckets) {
504 __pstat(s, "OOM: could not allocate bucket storage!?\n");
505 return;
506 }
507
508 /*
509 * Iterate across all of the allocs and determine what bucket they
510 * should go in. Round the size down to the nearest power of two to
511 * find the right bucket.
512 */
513 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
514 while (node) {
515 int b;
516 u64 bucket_min;
517 struct nvgpu_mem_alloc *alloc =
518 nvgpu_mem_alloc_from_rbtree_node(node);
519
520 bucket_min = (u64)rounddown_pow_of_two(alloc->size);
521 if (bucket_min < tracker->min_alloc)
522 bucket_min = tracker->min_alloc;
523
524 b = __ffs(bucket_min) - __ffs(pot_min);
525
526 /*
527 * Handle the one case were there's an alloc exactly as big as
528 * the maximum bucket size of the largest bucket. Most of the
529 * buckets have an inclusive minimum and exclusive maximum. But
530 * the largest bucket needs to have an _inclusive_ maximum as
531 * well.
532 */
533 if (b == (int)nr_buckets)
534 b--;
535
536 buckets[b]++;
537
538 nvgpu_rbtree_enum_next(&node, node);
539 }
540
541 total_allocs = 0;
542 for (i = 0; i < (int)nr_buckets; i++)
543 total_allocs += buckets[i];
544
545 __pstat(s, "Alloc histogram:\n");
546
547 /*
548 * Actually compute the histogram lines.
549 */
550 for (i = 0; i < (int)nr_buckets; i++) {
551 char this_line[sizeof(histogram_line) + 1];
552 u64 line_length;
553 u64 hr_bytes;
554 const char *hr_suffix;
555
556 memset(this_line, 0, sizeof(this_line));
557
558 /*
559 * Compute the normalized line length. Cant use floating point
560 * so we will just multiply everything by 1000 and use fixed
561 * point.
562 */
563 line_length = (1000 * buckets[i]) / total_allocs;
564 line_length *= sizeof(histogram_line);
565 line_length /= 1000;
566
567 memset(this_line, '+', line_length);
568
569 __to_human_readable_bytes(1 << (__ffs(pot_min) + i),
570 &hr_bytes, &hr_suffix);
571 __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
572 hr_bytes, hr_bytes << 1,
573 hr_suffix, buckets[i], this_line);
574 }
575}
576
577#ifdef CONFIG_DEBUG_FS
578/**
579 * nvgpu_kmem_print_stats - Print kmem tracking stats.
580 *
581 * @tracker The tracking to pull data from.
582 * @s A seq_file to dump info into.
583 *
584 * Print stats from a tracker. If @s is non-null then seq_printf() will be
585 * used with @s. Otherwise the stats are pr_info()ed.
586 */
587void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
588 struct seq_file *s)
589{
590 lock_tracker(tracker);
591
592 __pstat(s, "Mem tracker: %s\n\n", tracker->name);
593
594 __pstat(s, "Basic Stats:\n");
595 __pstat(s, " Number of allocs %lld\n",
596 tracker->nr_allocs);
597 __pstat(s, " Number of frees %lld\n",
598 tracker->nr_frees);
599 print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
600 print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
601 print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
602 print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
603 print_hr_bytes(s, " Bytes allocated (real) ",
604 tracker->bytes_alloced_real);
605 print_hr_bytes(s, " Bytes freed (real) ",
606 tracker->bytes_freed_real);
607 __pstat(s, "\n");
608
609 print_histogram(tracker, s);
610
611 unlock_tracker(tracker);
612}
613
614static int __kmem_tracking_show(struct seq_file *s, void *unused)
615{
616 struct nvgpu_mem_alloc_tracker *tracker = s->private;
617
618 nvgpu_kmem_print_stats(tracker, s);
619
620 return 0;
621}
622
623static int __kmem_tracking_open(struct inode *inode, struct file *file)
624{
625 return single_open(file, __kmem_tracking_show, inode->i_private);
626}
627
628static const struct file_operations __kmem_tracking_fops = {
629 .open = __kmem_tracking_open,
630 .read = seq_read,
631 .llseek = seq_lseek,
632 .release = single_release,
633};
634
635static int __kmem_traces_dump_tracker(struct gk20a *g,
636 struct nvgpu_mem_alloc_tracker *tracker,
637 struct seq_file *s)
638{
639 struct nvgpu_rbtree_node *node;
640
641 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
642 while (node) {
643 struct nvgpu_mem_alloc *alloc =
644 nvgpu_mem_alloc_from_rbtree_node(node);
645
646 kmem_print_mem_alloc(g, alloc, s);
647
648 nvgpu_rbtree_enum_next(&node, node);
649 }
650
651 return 0;
652}
653
654static int __kmem_traces_show(struct seq_file *s, void *unused)
655{
656 struct gk20a *g = s->private;
657
658 lock_tracker(g->vmallocs);
659 seq_puts(s, "Oustanding vmallocs:\n");
660 __kmem_traces_dump_tracker(g, g->vmallocs, s);
661 seq_puts(s, "\n");
662 unlock_tracker(g->vmallocs);
663
664 lock_tracker(g->kmallocs);
665 seq_puts(s, "Oustanding kmallocs:\n");
666 __kmem_traces_dump_tracker(g, g->kmallocs, s);
667 unlock_tracker(g->kmallocs);
668
669 return 0;
670}
671
672static int __kmem_traces_open(struct inode *inode, struct file *file)
673{
674 return single_open(file, __kmem_traces_show, inode->i_private);
675}
676
677static const struct file_operations __kmem_traces_fops = {
678 .open = __kmem_traces_open,
679 .read = seq_read,
680 .llseek = seq_lseek,
681 .release = single_release,
682};
683
684void nvgpu_kmem_debugfs_init(struct device *dev)
685{
686 struct gk20a_platform *plat = dev_get_drvdata(dev);
687 struct gk20a *g = get_gk20a(dev);
688 struct dentry *gpu_root = plat->debugfs;
689 struct dentry *node;
690
691 g->debugfs_kmem = debugfs_create_dir("kmem_tracking", gpu_root);
692 if (IS_ERR_OR_NULL(g->debugfs_kmem))
693 return;
694
695 node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
696 g->debugfs_kmem,
697 g->vmallocs, &__kmem_tracking_fops);
698 node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
699 g->debugfs_kmem,
700 g->kmallocs, &__kmem_tracking_fops);
701 node = debugfs_create_file("traces", S_IRUGO,
702 g->debugfs_kmem,
703 g, &__kmem_traces_fops);
704}
705#else
706void nvgpu_kmem_debugfs_init(struct device *dev)
707{
708}
709#endif
710
711static int __do_check_for_outstanding_allocs( 410static int __do_check_for_outstanding_allocs(
712 struct gk20a *g, 411 struct gk20a *g,
713 struct nvgpu_mem_alloc_tracker *tracker, 412 struct nvgpu_mem_alloc_tracker *tracker,
diff --git a/drivers/gpu/nvgpu/common/linux/kmem_priv.h b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
index d3abb378..a41762af 100644
--- a/drivers/gpu/nvgpu/common/linux/kmem_priv.h
+++ b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
@@ -20,6 +20,8 @@
20#include <nvgpu/rbtree.h> 20#include <nvgpu/rbtree.h>
21#include <nvgpu/lock.h> 21#include <nvgpu/lock.h>
22 22
23struct seq_file;
24
23#define __pstat(s, fmt, msg...) \ 25#define __pstat(s, fmt, msg...) \
24 do { \ 26 do { \
25 if (s) \ 27 if (s) \
@@ -92,6 +94,12 @@ struct nvgpu_mem_alloc_tracker {
92 unsigned long max_alloc; 94 unsigned long max_alloc;
93}; 95};
94 96
97void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
98void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
99
100void kmem_print_mem_alloc(struct gk20a *g,
101 struct nvgpu_mem_alloc *alloc,
102 struct seq_file *s);
95#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ 103#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
96 104
97#endif /* __KMEM_PRIV_H__ */ 105#endif /* __KMEM_PRIV_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index d5fc40de..4f7fc3fa 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -29,6 +29,7 @@
29#include <nvgpu/nvgpu_common.h> 29#include <nvgpu/nvgpu_common.h>
30#include <nvgpu/soc.h> 30#include <nvgpu/soc.h>
31#include <nvgpu/enabled.h> 31#include <nvgpu/enabled.h>
32#include <nvgpu/debug.h>
32 33
33#include "gk20a/gk20a.h" 34#include "gk20a/gk20a.h"
34#include "gk20a/platform_gk20a.h" 35#include "gk20a/platform_gk20a.h"
@@ -970,10 +971,7 @@ static int __exit gk20a_remove(struct platform_device *pdev)
970 971
971 gk20a_user_deinit(dev, &nvgpu_class); 972 gk20a_user_deinit(dev, &nvgpu_class);
972 973
973#ifdef CONFIG_DEBUG_FS 974 gk20a_debug_deinit(g);
974 debugfs_remove_recursive(platform->debugfs);
975 debugfs_remove_recursive(platform->debugfs_alias);
976#endif
977 975
978 gk20a_remove_sysfs(dev); 976 gk20a_remove_sysfs(dev);
979 977
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
index 40ee199a..eae0475a 100644
--- a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
@@ -411,7 +411,9 @@ int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
411 wmb(); 411 wmb();
412 a->inited = true; 412 a->inited = true;
413 413
414#ifdef CONFIG_DEBUG_FS
414 nvgpu_init_alloc_debug(g, __a); 415 nvgpu_init_alloc_debug(g, __a);
416#endif
415 alloc_dbg(__a, "New allocator: type bitmap\n"); 417 alloc_dbg(__a, "New allocator: type bitmap\n");
416 alloc_dbg(__a, " base 0x%llx\n", a->base); 418 alloc_dbg(__a, " base 0x%llx\n", a->base);
417 alloc_dbg(__a, " bit_offs 0x%llx\n", a->bit_offs); 419 alloc_dbg(__a, " bit_offs 0x%llx\n", a->bit_offs);
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
index 34bc51df..0ef94c10 100644
--- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -251,7 +251,9 @@ static void nvgpu_buddy_allocator_destroy(struct nvgpu_allocator *__a)
251 251
252 alloc_lock(__a); 252 alloc_lock(__a);
253 253
254#ifdef CONFIG_DEBUG_FS
254 nvgpu_fini_alloc_debug(__a); 255 nvgpu_fini_alloc_debug(__a);
256#endif
255 257
256 /* 258 /*
257 * Free the fixed allocs first. 259 * Free the fixed allocs first.
@@ -1290,7 +1292,9 @@ int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
1290 wmb(); 1292 wmb();
1291 a->initialized = 1; 1293 a->initialized = 1;
1292 1294
1295#ifdef CONFIG_DEBUG_FS
1293 nvgpu_init_alloc_debug(g, __a); 1296 nvgpu_init_alloc_debug(g, __a);
1297#endif
1294 alloc_dbg(__a, "New allocator: type buddy\n"); 1298 alloc_dbg(__a, "New allocator: type buddy\n");
1295 alloc_dbg(__a, " base 0x%llx\n", a->base); 1299 alloc_dbg(__a, " base 0x%llx\n", a->base);
1296 alloc_dbg(__a, " size 0x%llx\n", a->length); 1300 alloc_dbg(__a, " size 0x%llx\n", a->length);
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
index 234ae4a3..944b4b0f 100644
--- a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
@@ -99,7 +99,9 @@ static void nvgpu_lockless_alloc_destroy(struct nvgpu_allocator *a)
99{ 99{
100 struct nvgpu_lockless_allocator *pa = a->priv; 100 struct nvgpu_lockless_allocator *pa = a->priv;
101 101
102#ifdef CONFIG_DEBUG_FS
102 nvgpu_fini_alloc_debug(a); 103 nvgpu_fini_alloc_debug(a);
104#endif
103 105
104 nvgpu_vfree(a->g, pa->next); 106 nvgpu_vfree(a->g, pa->next);
105 nvgpu_kfree(nvgpu_alloc_to_gpu(a), pa); 107 nvgpu_kfree(nvgpu_alloc_to_gpu(a), pa);
@@ -191,7 +193,9 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
191 wmb(); 193 wmb();
192 a->inited = true; 194 a->inited = true;
193 195
196#ifdef CONFIG_DEBUG_FS
194 nvgpu_init_alloc_debug(g, __a); 197 nvgpu_init_alloc_debug(g, __a);
198#endif
195 alloc_dbg(__a, "New allocator: type lockless\n"); 199 alloc_dbg(__a, "New allocator: type lockless\n");
196 alloc_dbg(__a, " base 0x%llx\n", a->base); 200 alloc_dbg(__a, " base 0x%llx\n", a->base);
197 alloc_dbg(__a, " nodes %d\n", a->nr_nodes); 201 alloc_dbg(__a, " nodes %d\n", a->nr_nodes);
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
index 211b353b..1646d2b1 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
@@ -20,11 +20,6 @@
20 20
21#include "gk20a/gk20a.h" 21#include "gk20a/gk20a.h"
22#include "gk20a/mm_gk20a.h" 22#include "gk20a/mm_gk20a.h"
23#ifdef CONFIG_DEBUG_FS
24#include "gk20a/platform_gk20a.h"
25#endif
26
27u32 nvgpu_alloc_tracing_on;
28 23
29u64 nvgpu_alloc_length(struct nvgpu_allocator *a) 24u64 nvgpu_alloc_length(struct nvgpu_allocator *a)
30{ 25{
@@ -151,68 +146,3 @@ int __nvgpu_alloc_common_init(struct nvgpu_allocator *a, struct gk20a *g,
151 146
152 return 0; 147 return 0;
153} 148}
154
155#ifdef CONFIG_DEBUG_FS
156void nvgpu_alloc_print_stats(struct nvgpu_allocator *__a,
157 struct seq_file *s, int lock)
158{
159 __a->ops->print_stats(__a, s, lock);
160}
161
162static int __alloc_show(struct seq_file *s, void *unused)
163{
164 struct nvgpu_allocator *a = s->private;
165
166 nvgpu_alloc_print_stats(a, s, 1);
167
168 return 0;
169}
170
171static int __alloc_open(struct inode *inode, struct file *file)
172{
173 return single_open(file, __alloc_show, inode->i_private);
174}
175
176static const struct file_operations __alloc_fops = {
177 .open = __alloc_open,
178 .read = seq_read,
179 .llseek = seq_lseek,
180 .release = single_release,
181};
182#endif
183
184void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
185{
186#ifdef CONFIG_DEBUG_FS
187 if (!g->debugfs_allocators)
188 return;
189
190 a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
191 g->debugfs_allocators,
192 a, &__alloc_fops);
193#endif
194}
195
196void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
197{
198#ifdef CONFIG_DEBUG_FS
199 if (!IS_ERR_OR_NULL(a->debugfs_entry))
200 debugfs_remove(a->debugfs_entry);
201#endif
202}
203
204#ifdef CONFIG_DEBUG_FS
205void nvgpu_alloc_debugfs_init(struct device *dev)
206{
207 struct gk20a_platform *platform = dev_get_drvdata(dev);
208 struct dentry *gpu_root = platform->debugfs;
209 struct gk20a *g = get_gk20a(dev);
210
211 g->debugfs_allocators = debugfs_create_dir("allocators", gpu_root);
212 if (IS_ERR_OR_NULL(g->debugfs_allocators))
213 return;
214
215 debugfs_create_u32("tracing", 0664, g->debugfs_allocators,
216 &nvgpu_alloc_tracing_on);
217}
218#endif
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
index 14b5da3c..3f4f3706 100644
--- a/drivers/gpu/nvgpu/common/mm/page_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -916,7 +916,9 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
916 if (err) 916 if (err)
917 goto fail; 917 goto fail;
918 918
919#ifdef CONFIG_DEBUG_FS
919 nvgpu_init_alloc_debug(g, __a); 920 nvgpu_init_alloc_debug(g, __a);
921#endif
920 palloc_dbg(a, "New allocator: type page\n"); 922 palloc_dbg(a, "New allocator: type page\n");
921 palloc_dbg(a, " base 0x%llx\n", a->base); 923 palloc_dbg(a, " base 0x%llx\n", a->base);
922 palloc_dbg(a, " size 0x%llx\n", a->length); 924 palloc_dbg(a, " size 0x%llx\n", a->length);