summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/Makefile.nvgpu14
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug.c (renamed from drivers/gpu/nvgpu/gk20a/debug_gk20a.c)167
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_allocator.c80
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_allocator.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_cde.c51
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_cde.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_ce.c30
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_ce.h (renamed from drivers/gpu/nvgpu/gm20b/debug_gm20b.h)15
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_fifo.c369
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_fifo.h22
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_gr.c31
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_gr.h (renamed from drivers/gpu/nvgpu/gm20b/debug_gm20b.c)14
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_kmem.c315
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_kmem.h23
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_mm.c26
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_mm.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_pmu.c479
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_pmu.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_sched.c79
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_sched.h21
-rw-r--r--drivers/gpu/nvgpu/common/linux/driver_common.c3
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_channel.c2
-rw-r--r--drivers/gpu/nvgpu/common/linux/kmem.c323
-rw-r--r--drivers/gpu/nvgpu/common/linux/kmem_priv.h8
-rw-r--r--drivers/gpu/nvgpu/common/linux/module.c6
-rw-r--r--drivers/gpu/nvgpu/common/mm/bitmap_allocator.c2
-rw-r--r--drivers/gpu/nvgpu/common/mm/buddy_allocator.c4
-rw-r--r--drivers/gpu/nvgpu/common/mm/lockless_allocator.c4
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c70
-rw-r--r--drivers/gpu/nvgpu/common/mm/page_allocator.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c45
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.c24
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c10
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c414
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h7
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c22
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/hal_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c16
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c487
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.h7
-rw-r--r--drivers/gpu/nvgpu/gk20a/sched_gk20a.c67
-rw-r--r--drivers/gpu/nvgpu/gk20a/sched_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c1
-rw-r--r--drivers/gpu/nvgpu/gm20b/hal_gm20b.c4
-rw-r--r--drivers/gpu/nvgpu/gp106/hal_gp106.c1
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c1
-rw-r--r--drivers/gpu/nvgpu/gp10b/hal_gp10b.c1
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/allocator.h7
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/debug.h (renamed from drivers/gpu/nvgpu/gk20a/debug_gk20a.h)32
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/linux/kmem.h6
-rw-r--r--drivers/gpu/nvgpu/vgpu/vgpu.c4
56 files changed, 1851 insertions, 1568 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index e7ea3c5d..4b6a8e87 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -66,7 +66,6 @@ nvgpu-y := \
66 gk20a/fifo_gk20a.o \ 66 gk20a/fifo_gk20a.o \
67 gk20a/channel_gk20a.o \ 67 gk20a/channel_gk20a.o \
68 gk20a/channel_sync_gk20a.o \ 68 gk20a/channel_sync_gk20a.o \
69 gk20a/debug_gk20a.o \
70 gk20a/dbg_gpu_gk20a.o \ 69 gk20a/dbg_gpu_gk20a.o \
71 gk20a/regops_gk20a.o \ 70 gk20a/regops_gk20a.o \
72 gk20a/gr_gk20a.o \ 71 gk20a/gr_gk20a.o \
@@ -107,7 +106,6 @@ nvgpu-y := \
107 gm20b/mm_gm20b.o \ 106 gm20b/mm_gm20b.o \
108 gm20b/regops_gm20b.o \ 107 gm20b/regops_gm20b.o \
109 gm20b/mc_gm20b.o \ 108 gm20b/mc_gm20b.o \
110 gm20b/debug_gm20b.o \
111 gm20b/cde_gm20b.o \ 109 gm20b/cde_gm20b.o \
112 gm20b/therm_gm20b.o \ 110 gm20b/therm_gm20b.o \
113 gm206/bios_gm206.o \ 111 gm206/bios_gm206.o \
@@ -117,6 +115,18 @@ nvgpu-y := \
117 boardobj/boardobjgrp_e255.o \ 115 boardobj/boardobjgrp_e255.o \
118 boardobj/boardobjgrp_e32.o 116 boardobj/boardobjgrp_e32.o
119 117
118nvgpu-$(CONFIG_DEBUG_FS) += \
119 common/linux/debug.o \
120 common/linux/debug_gr.o \
121 common/linux/debug_fifo.o \
122 common/linux/debug_cde.o \
123 common/linux/debug_ce.o \
124 common/linux/debug_pmu.o \
125 common/linux/debug_sched.o \
126 common/linux/debug_mm.o \
127 common/linux/debug_allocator.o \
128 common/linux/debug_kmem.o
129
120nvgpu-$(CONFIG_TEGRA_GK20A) += tegra/linux/platform_gk20a_tegra.o 130nvgpu-$(CONFIG_TEGRA_GK20A) += tegra/linux/platform_gk20a_tegra.o
121nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o 131nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o
122nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o 132nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/common/linux/debug.c
index ac435046..2962a467 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/common/linux/debug.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2011-2017 NVIDIA Corporation. All rights reserved. 2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 * 3 *
4 * This software is licensed under the terms of the GNU General Public 4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and 5 * License version 2, as published by the Free Software Foundation, and
@@ -12,26 +12,23 @@
12 * 12 *
13 */ 13 */
14 14
15#ifdef CONFIG_DEBUG_FS 15#include "debug_cde.h"
16#include "debug_ce.h"
17#include "debug_fifo.h"
18#include "debug_gr.h"
19#include "debug_mm.h"
20#include "debug_allocator.h"
21#include "debug_kmem.h"
22#include "debug_pmu.h"
23#include "debug_sched.h"
24
25#include "gk20a/gk20a.h"
26#include "gk20a/platform_gk20a.h"
27
16#include <linux/debugfs.h> 28#include <linux/debugfs.h>
17#endif
18#include <linux/seq_file.h> 29#include <linux/seq_file.h>
19#include <linux/io.h>
20#include <linux/fs.h>
21
22#include <nvgpu/log.h>
23#include <nvgpu/kmem.h>
24#include <nvgpu/semaphore.h>
25#include <nvgpu/log.h>
26
27#include "gk20a.h"
28#include "gk20a/platform_gk20a.h"
29#include "debug_gk20a.h"
30 30
31#include <nvgpu/hw/gk20a/hw_ram_gk20a.h> 31#include <nvgpu/debug.h>
32#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
33#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
34#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
35 32
36unsigned int gk20a_debug_trace_cmdbuf; 33unsigned int gk20a_debug_trace_cmdbuf;
37 34
@@ -59,81 +56,22 @@ void gk20a_debug_output(struct gk20a_debug_output *o,
59 o->fn(o->ctx, o->buf, len); 56 o->fn(o->ctx, o->buf, len);
60} 57}
61 58
62static void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g, 59static int gk20a_gr_dump_regs(struct gk20a *g,
63 struct gk20a_debug_output *o)
64{
65 struct fifo_gk20a *f = &g->fifo;
66 u32 chid;
67 struct ch_state **ch_state;
68
69 ch_state = nvgpu_kzalloc(g, sizeof(*ch_state) * f->num_channels);
70 if (!ch_state) {
71 gk20a_debug_output(o, "cannot alloc memory for channels\n");
72 return;
73 }
74
75 for (chid = 0; chid < f->num_channels; chid++) {
76 struct channel_gk20a *ch = &f->channel[chid];
77 if (gk20a_channel_get(ch)) {
78 ch_state[chid] =
79 nvgpu_kmalloc(g, sizeof(struct ch_state) +
80 ram_in_alloc_size_v());
81 /* ref taken stays to below loop with
82 * successful allocs */
83 if (!ch_state[chid])
84 gk20a_channel_put(ch);
85 }
86 }
87
88 for (chid = 0; chid < f->num_channels; chid++) {
89 struct channel_gk20a *ch = &f->channel[chid];
90 if (!ch_state[chid])
91 continue;
92
93 ch_state[chid]->pid = ch->pid;
94 ch_state[chid]->refs = atomic_read(&ch->ref_count);
95 nvgpu_mem_rd_n(g, &ch->inst_block, 0,
96 &ch_state[chid]->inst_block[0],
97 ram_in_alloc_size_v());
98 gk20a_channel_put(ch);
99 }
100 for (chid = 0; chid < f->num_channels; chid++) {
101 if (ch_state[chid]) {
102 g->ops.fifo.dump_channel_status_ramfc(g, o, chid,
103 ch_state[chid]);
104 nvgpu_kfree(g, ch_state[chid]);
105 }
106 }
107 nvgpu_kfree(g, ch_state);
108}
109
110void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
111{
112 g->ops.fifo.dump_pbdma_status(g, o);
113 g->ops.fifo.dump_eng_status(g, o);
114
115 gk20a_debug_dump_all_channel_status_ramfc(g, o);
116}
117
118static int gk20a_gr_dump_regs(struct device *dev,
119 struct gk20a_debug_output *o) 60 struct gk20a_debug_output *o)
120{ 61{
121 struct gk20a_platform *platform = gk20a_get_platform(dev);
122 struct gk20a *g = platform->g;
123
124 if (g->ops.gr.dump_gr_regs) 62 if (g->ops.gr.dump_gr_regs)
125 gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o)); 63 gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o));
126 64
127 return 0; 65 return 0;
128} 66}
129 67
130int gk20a_gr_debug_dump(struct device *dev) 68int gk20a_gr_debug_dump(struct gk20a *g)
131{ 69{
132 struct gk20a_debug_output o = { 70 struct gk20a_debug_output o = {
133 .fn = gk20a_debug_write_printk 71 .fn = gk20a_debug_write_printk
134 }; 72 };
135 73
136 gk20a_gr_dump_regs(dev, &o); 74 gk20a_gr_dump_regs(g, &o);
137 75
138 return 0; 76 return 0;
139} 77}
@@ -154,23 +92,22 @@ static int gk20a_gr_debug_show(struct seq_file *s, void *unused)
154 return -EINVAL; 92 return -EINVAL;
155 } 93 }
156 94
157 gk20a_gr_dump_regs(dev, &o); 95 gk20a_gr_dump_regs(g, &o);
158 96
159 gk20a_idle(g); 97 gk20a_idle(g);
160 98
161 return 0; 99 return 0;
162} 100}
163 101
164void gk20a_debug_dump(struct device *dev) 102void gk20a_debug_dump(struct gk20a *g)
165{ 103{
166 struct gk20a_platform *platform = gk20a_get_platform(dev); 104 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
167 struct gk20a *g = platform->g;
168 struct gk20a_debug_output o = { 105 struct gk20a_debug_output o = {
169 .fn = gk20a_debug_write_printk 106 .fn = gk20a_debug_write_printk
170 }; 107 };
171 108
172 if (platform->dump_platform_dependencies) 109 if (platform->dump_platform_dependencies)
173 platform->dump_platform_dependencies(dev); 110 platform->dump_platform_dependencies(g->dev);
174 111
175 /* HAL only initialized after 1st power-on */ 112 /* HAL only initialized after 1st power-on */
176 if (g->ops.debug.show_dump) 113 if (g->ops.debug.show_dump)
@@ -227,22 +164,28 @@ static const struct file_operations gk20a_debug_fops = {
227 .release = single_release, 164 .release = single_release,
228}; 165};
229 166
167void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
168{
169 g->ops.fifo.dump_pbdma_status(g, o);
170 g->ops.fifo.dump_eng_status(g, o);
171
172 gk20a_debug_dump_all_channel_status_ramfc(g, o);
173}
174
230void gk20a_init_debug_ops(struct gpu_ops *gops) 175void gk20a_init_debug_ops(struct gpu_ops *gops)
231{ 176{
232 gops->debug.show_dump = gk20a_debug_show_dump; 177 gops->debug.show_dump = gk20a_debug_show_dump;
233} 178}
234 179
235#ifdef CONFIG_DEBUG_FS
236static int railgate_residency_show(struct seq_file *s, void *data) 180static int railgate_residency_show(struct seq_file *s, void *data)
237{ 181{
238 struct device *dev = s->private; 182 struct gk20a *g = s->private;
239 struct gk20a_platform *platform = dev_get_drvdata(dev); 183 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
240 struct gk20a *g = get_gk20a(dev);
241 unsigned long time_since_last_state_transition_ms; 184 unsigned long time_since_last_state_transition_ms;
242 unsigned long total_rail_gate_time_ms; 185 unsigned long total_rail_gate_time_ms;
243 unsigned long total_rail_ungate_time_ms; 186 unsigned long total_rail_ungate_time_ms;
244 187
245 if (platform->is_railgated(dev)) { 188 if (platform->is_railgated(g->dev)) {
246 time_since_last_state_transition_ms = 189 time_since_last_state_transition_ms =
247 jiffies_to_msecs(jiffies - 190 jiffies_to_msecs(jiffies -
248 g->pstats.last_rail_gate_complete); 191 g->pstats.last_rail_gate_complete);
@@ -282,30 +225,27 @@ static const struct file_operations railgate_residency_fops = {
282 .release = single_release, 225 .release = single_release,
283}; 226};
284 227
285int gk20a_railgating_debugfs_init(struct device *dev) 228static int gk20a_railgating_debugfs_init(struct gk20a *g)
286{ 229{
230 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
287 struct dentry *d; 231 struct dentry *d;
288 struct gk20a_platform *platform = dev_get_drvdata(dev);
289 struct gk20a *g = get_gk20a(dev);
290 232
291 if (!g->can_railgate) 233 if (!g->can_railgate)
292 return 0; 234 return 0;
293 235
294 d = debugfs_create_file( 236 d = debugfs_create_file(
295 "railgate_residency", S_IRUGO|S_IWUSR, platform->debugfs, dev, 237 "railgate_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
296 &railgate_residency_fops); 238 &railgate_residency_fops);
297 if (!d) 239 if (!d)
298 return -ENOMEM; 240 return -ENOMEM;
299 241
300 return 0; 242 return 0;
301} 243}
302#endif
303 244
304void gk20a_debug_init(struct device *dev, const char *debugfs_symlink) 245void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
305{ 246{
306#ifdef CONFIG_DEBUG_FS 247 struct device *dev = g->dev;
307 struct gk20a_platform *platform = dev_get_drvdata(dev); 248 struct gk20a_platform *platform = dev_get_drvdata(dev);
308 struct gk20a *g = platform->g;
309 249
310 platform->debugfs = debugfs_create_dir(dev_name(dev), NULL); 250 platform->debugfs = debugfs_create_dir(dev_name(dev), NULL);
311 if (!platform->debugfs) 251 if (!platform->debugfs)
@@ -409,17 +349,28 @@ void gk20a_debug_init(struct device *dev, const char *debugfs_symlink)
409#endif 349#endif
410 350
411 gr_gk20a_debugfs_init(g); 351 gr_gk20a_debugfs_init(g);
412 gk20a_pmu_debugfs_init(g->dev); 352 gk20a_pmu_debugfs_init(g);
413 gk20a_railgating_debugfs_init(g->dev); 353 gk20a_railgating_debugfs_init(g);
414 gk20a_cde_debugfs_init(g->dev); 354 gk20a_cde_debugfs_init(g);
415 gk20a_ce_debugfs_init(g->dev); 355 gk20a_ce_debugfs_init(g);
416 nvgpu_alloc_debugfs_init(g->dev); 356 nvgpu_alloc_debugfs_init(g);
417 gk20a_mm_debugfs_init(g->dev); 357 gk20a_mm_debugfs_init(g);
418 gk20a_fifo_debugfs_init(g->dev); 358 gk20a_fifo_debugfs_init(g);
419 gk20a_sched_debugfs_init(g->dev); 359 gk20a_sched_debugfs_init(g);
420#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE 360#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
421 nvgpu_kmem_debugfs_init(g->dev); 361 nvgpu_kmem_debugfs_init(g);
422#endif
423#endif 362#endif
363}
364
365void gk20a_debug_deinit(struct gk20a *g)
366{
367 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
368
369 if (!platform->debugfs)
370 return;
371
372 gk20a_fifo_debugfs_deinit(g);
424 373
374 debugfs_remove_recursive(platform->debugfs);
375 debugfs_remove_recursive(platform->debugfs_alias);
425} 376}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.c b/drivers/gpu/nvgpu/common/linux/debug_allocator.c
new file mode 100644
index 00000000..3d4a2bb2
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_allocator.c
@@ -0,0 +1,80 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_allocator.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21#include <nvgpu/allocator.h>
22
23u32 nvgpu_alloc_tracing_on;
24
25void nvgpu_alloc_print_stats(struct nvgpu_allocator *__a,
26 struct seq_file *s, int lock)
27{
28 __a->ops->print_stats(__a, s, lock);
29}
30
31static int __alloc_show(struct seq_file *s, void *unused)
32{
33 struct nvgpu_allocator *a = s->private;
34
35 nvgpu_alloc_print_stats(a, s, 1);
36
37 return 0;
38}
39
40static int __alloc_open(struct inode *inode, struct file *file)
41{
42 return single_open(file, __alloc_show, inode->i_private);
43}
44
45static const struct file_operations __alloc_fops = {
46 .open = __alloc_open,
47 .read = seq_read,
48 .llseek = seq_lseek,
49 .release = single_release,
50};
51
52void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
53{
54 if (!g->debugfs_allocators)
55 return;
56
57 a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
58 g->debugfs_allocators,
59 a, &__alloc_fops);
60}
61
62void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
63{
64 if (!IS_ERR_OR_NULL(a->debugfs_entry))
65 debugfs_remove(a->debugfs_entry);
66}
67
68void nvgpu_alloc_debugfs_init(struct gk20a *g)
69{
70 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
71
72 g->debugfs_allocators = debugfs_create_dir("allocators", platform->debugfs);
73 if (IS_ERR_OR_NULL(g->debugfs_allocators)) {
74 g->debugfs_allocators = NULL;
75 return;
76 }
77
78 debugfs_create_u32("tracing", 0664, g->debugfs_allocators,
79 &nvgpu_alloc_tracing_on);
80}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.h b/drivers/gpu/nvgpu/common/linux/debug_allocator.h
new file mode 100644
index 00000000..1b21cfc5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_allocator.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_ALLOCATOR_H__
16#define __NVGPU_DEBUG_ALLOCATOR_H__
17
18struct gk20a;
19void nvgpu_alloc_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.c b/drivers/gpu/nvgpu/common/linux/debug_cde.c
new file mode 100644
index 00000000..eb7c33e2
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_cde.c
@@ -0,0 +1,51 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_cde.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19
20
21static ssize_t gk20a_cde_reload_write(struct file *file,
22 const char __user *userbuf, size_t count, loff_t *ppos)
23{
24 struct gk20a *g = file->private_data;
25 gk20a_cde_reload(g);
26 return count;
27}
28
29static const struct file_operations gk20a_cde_reload_fops = {
30 .open = simple_open,
31 .write = gk20a_cde_reload_write,
32};
33
34void gk20a_cde_debugfs_init(struct gk20a *g)
35{
36 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
37
38 if (!platform->has_cde)
39 return;
40
41 debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
42 platform->debugfs, &g->cde_app.shader_parameter);
43 debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
44 platform->debugfs, &g->cde_app.ctx_count);
45 debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
46 platform->debugfs, &g->cde_app.ctx_usecount);
47 debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
48 platform->debugfs, &g->cde_app.ctx_count_top);
49 debugfs_create_file("reload_cde_firmware", S_IWUSR, platform->debugfs,
50 g, &gk20a_cde_reload_fops);
51}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.h b/drivers/gpu/nvgpu/common/linux/debug_cde.h
new file mode 100644
index 00000000..4895edd6
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_cde.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_CDE_H__
16#define __NVGPU_DEBUG_CDE_H__
17
18struct gk20a;
19void gk20a_cde_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_CDE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_ce.c b/drivers/gpu/nvgpu/common/linux/debug_ce.c
new file mode 100644
index 00000000..9c50870e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_ce.c
@@ -0,0 +1,30 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_ce.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19
20void gk20a_ce_debugfs_init(struct gk20a *g)
21{
22 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
23
24 debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO,
25 platform->debugfs, &g->ce_app.ctx_count);
26 debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO,
27 platform->debugfs, &g->ce_app.app_state);
28 debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO,
29 platform->debugfs, &g->ce_app.next_ctx_id);
30}
diff --git a/drivers/gpu/nvgpu/gm20b/debug_gm20b.h b/drivers/gpu/nvgpu/common/linux/debug_ce.h
index c3c5fed6..2a8750c4 100644
--- a/drivers/gpu/nvgpu/gm20b/debug_gm20b.h
+++ b/drivers/gpu/nvgpu/common/linux/debug_ce.h
@@ -1,7 +1,5 @@
1/* 1/*
2 * GM20B Debug functionality 2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * Copyright (C) 2015 NVIDIA CORPORATION. All rights reserved.
5 * 3 *
6 * This software is licensed under the terms of the GNU General Public 4 * This software is licensed under the terms of the GNU General Public
7 * License version 2, as published by the Free Software Foundation, and 5 * License version 2, as published by the Free Software Foundation, and
@@ -14,11 +12,10 @@
14 * 12 *
15 */ 13 */
16 14
17#ifndef _DEBUG_GM20B_H_ 15#ifndef __NVGPU_DEBUG_CE_H__
18#define _DEBUG_GM20B_H_ 16#define __NVGPU_DEBUG_CE_H__
19
20struct gpu_ops;
21 17
22void gm20b_init_debug_ops(struct gpu_ops *gops); 18struct gk20a;
19void gk20a_ce_debugfs_init(struct gk20a *g);
23 20
24#endif 21#endif /* __NVGPU_DEBUG_CE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
new file mode 100644
index 00000000..6a28b1a5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
@@ -0,0 +1,369 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_fifo.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21#include <nvgpu/sort.h>
22
23void __gk20a_fifo_profile_free(struct kref *ref);
24
25static void *gk20a_fifo_sched_debugfs_seq_start(
26 struct seq_file *s, loff_t *pos)
27{
28 struct gk20a *g = s->private;
29 struct fifo_gk20a *f = &g->fifo;
30
31 if (*pos >= f->num_channels)
32 return NULL;
33
34 return &f->channel[*pos];
35}
36
37static void *gk20a_fifo_sched_debugfs_seq_next(
38 struct seq_file *s, void *v, loff_t *pos)
39{
40 struct gk20a *g = s->private;
41 struct fifo_gk20a *f = &g->fifo;
42
43 ++(*pos);
44 if (*pos >= f->num_channels)
45 return NULL;
46
47 return &f->channel[*pos];
48}
49
50static void gk20a_fifo_sched_debugfs_seq_stop(
51 struct seq_file *s, void *v)
52{
53}
54
55static int gk20a_fifo_sched_debugfs_seq_show(
56 struct seq_file *s, void *v)
57{
58 struct gk20a *g = s->private;
59 struct fifo_gk20a *f = &g->fifo;
60 struct channel_gk20a *ch = v;
61 struct tsg_gk20a *tsg = NULL;
62
63 struct fifo_engine_info_gk20a *engine_info;
64 struct fifo_runlist_info_gk20a *runlist;
65 u32 runlist_id;
66 int ret = SEQ_SKIP;
67 u32 engine_id;
68
69 engine_id = gk20a_fifo_get_gr_engine_id(g);
70 engine_info = (f->engine_info + engine_id);
71 runlist_id = engine_info->runlist_id;
72 runlist = &f->runlist_info[runlist_id];
73
74 if (ch == f->channel) {
75 seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n");
76 seq_puts(s, " (usecs) (msecs)\n");
77 ret = 0;
78 }
79
80 if (!test_bit(ch->hw_chid, runlist->active_channels))
81 return ret;
82
83 if (gk20a_channel_get(ch)) {
84 if (gk20a_is_channel_marked_as_tsg(ch))
85 tsg = &f->tsg[ch->tsgid];
86
87 seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
88 ch->hw_chid,
89 ch->tsgid,
90 ch->tgid,
91 tsg ? tsg->timeslice_us : ch->timeslice_us,
92 ch->timeout_ms_max,
93 tsg ? tsg->interleave_level : ch->interleave_level,
94 ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX,
95 ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX);
96 gk20a_channel_put(ch);
97 }
98 return 0;
99}
100
101static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
102 .start = gk20a_fifo_sched_debugfs_seq_start,
103 .next = gk20a_fifo_sched_debugfs_seq_next,
104 .stop = gk20a_fifo_sched_debugfs_seq_stop,
105 .show = gk20a_fifo_sched_debugfs_seq_show
106};
107
108static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
109 struct file *file)
110{
111 int err;
112
113 if (!capable(CAP_SYS_ADMIN))
114 return -EPERM;
115
116 err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
117 if (err)
118 return err;
119
120 gk20a_dbg(gpu_dbg_info, "i_private=%p", inode->i_private);
121
122 ((struct seq_file *)file->private_data)->private = inode->i_private;
123 return 0;
124};
125
126/*
127 * The file operations structure contains our open function along with
128 * set of the canned seq_ ops.
129 */
130static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
131 .owner = THIS_MODULE,
132 .open = gk20a_fifo_sched_debugfs_open,
133 .read = seq_read,
134 .llseek = seq_lseek,
135 .release = seq_release
136};
137
138static int gk20a_fifo_profile_enable(void *data, u64 val)
139{
140 struct gk20a *g = (struct gk20a *) data;
141 struct fifo_gk20a *f = &g->fifo;
142
143
144 nvgpu_mutex_acquire(&f->profile.lock);
145 if (val == 0) {
146 if (f->profile.enabled) {
147 f->profile.enabled = false;
148 kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
149 }
150 } else {
151 if (!f->profile.enabled) {
152 /* not kref init as it can have a running condition if
153 * we enable/disable/enable while kickoff is happening
154 */
155 if (!kref_get_unless_zero(&f->profile.ref)) {
156 f->profile.data = vzalloc(
157 FIFO_PROFILING_ENTRIES *
158 sizeof(struct fifo_profile_gk20a));
159 f->profile.sorted = vzalloc(
160 FIFO_PROFILING_ENTRIES *
161 sizeof(u64));
162 if (!(f->profile.data && f->profile.sorted)) {
163 nvgpu_vfree(g, f->profile.data);
164 nvgpu_vfree(g, f->profile.sorted);
165 nvgpu_mutex_release(&f->profile.lock);
166 return -ENOMEM;
167 }
168 kref_init(&f->profile.ref);
169 }
170 atomic_set(&f->profile.get, 0);
171 f->profile.enabled = true;
172 }
173 }
174 nvgpu_mutex_release(&f->profile.lock);
175
176 return 0;
177}
178
179DEFINE_SIMPLE_ATTRIBUTE(
180 gk20a_fifo_profile_enable_debugfs_fops,
181 NULL,
182 gk20a_fifo_profile_enable,
183 "%llu\n"
184);
185
186static int __profile_cmp(const void *a, const void *b)
187{
188 return *((unsigned long long *) a) - *((unsigned long long *) b);
189}
190
191/*
192 * This uses about 800b in the stack, but the function using it is not part
193 * of a callstack where much memory is being used, so it is fine
194 */
195#define PERCENTILE_WIDTH 5
196#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH)
197
198static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
199 u64 *percentiles, u32 index_end, u32 index_start)
200{
201 unsigned int nelem = 0;
202 unsigned int index;
203 struct fifo_profile_gk20a *profile;
204
205 for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
206 profile = &g->fifo.profile.data[index];
207
208 if (profile->timestamp[index_end] >
209 profile->timestamp[index_start]) {
210 /* This is a valid element */
211 g->fifo.profile.sorted[nelem] =
212 profile->timestamp[index_end] -
213 profile->timestamp[index_start];
214 nelem++;
215 }
216 }
217
218 /* sort it */
219 sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
220 __profile_cmp, NULL);
221
222 /* build ranges */
223 for (index = 0; index < PERCENTILE_RANGES; index++)
224 percentiles[index] =
225 g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
226 nelem)/100 - 1];
227 return nelem;
228}
229
230static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
231{
232 struct gk20a *g = s->private;
233 unsigned int get, nelem, index;
234 /*
235 * 800B in the stack, but function is declared statically and only
236 * called from debugfs handler
237 */
238 u64 percentiles_ioctl[PERCENTILE_RANGES];
239 u64 percentiles_kickoff[PERCENTILE_RANGES];
240 u64 percentiles_jobtracking[PERCENTILE_RANGES];
241 u64 percentiles_append[PERCENTILE_RANGES];
242 u64 percentiles_userd[PERCENTILE_RANGES];
243
244 if (!kref_get_unless_zero(&g->fifo.profile.ref)) {
245 seq_printf(s, "Profiling disabled\n");
246 return 0;
247 }
248
249 get = atomic_read(&g->fifo.profile.get);
250
251 __gk20a_fifo_create_stats(g, percentiles_ioctl,
252 PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
253 __gk20a_fifo_create_stats(g, percentiles_kickoff,
254 PROFILE_END, PROFILE_ENTRY);
255 __gk20a_fifo_create_stats(g, percentiles_jobtracking,
256 PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
257 __gk20a_fifo_create_stats(g, percentiles_append,
258 PROFILE_APPEND, PROFILE_JOB_TRACKING);
259 nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
260 PROFILE_END, PROFILE_APPEND);
261
262 seq_printf(s, "Number of kickoffs: %d\n", nelem);
263 seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
264
265 for (index = 0; index < PERCENTILE_RANGES; index++)
266 seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
267 PERCENTILE_WIDTH * (index+1),
268 percentiles_ioctl[index],
269 percentiles_kickoff[index],
270 percentiles_append[index],
271 percentiles_jobtracking[index],
272 percentiles_userd[index]);
273
274 kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
275
276 return 0;
277}
278
279static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
280{
281 return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
282}
283
284static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
285 .open = gk20a_fifo_profile_stats_open,
286 .read = seq_read,
287 .llseek = seq_lseek,
288 .release = single_release,
289};
290
291
292void gk20a_fifo_debugfs_init(struct gk20a *g)
293{
294 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
295
296 struct dentry *gpu_root = platform->debugfs;
297 struct dentry *fifo_root;
298 struct dentry *profile_root;
299
300 fifo_root = debugfs_create_dir("fifo", gpu_root);
301 if (IS_ERR_OR_NULL(fifo_root))
302 return;
303
304 gk20a_dbg(gpu_dbg_info, "g=%p", g);
305
306 debugfs_create_file("sched", 0600, fifo_root, g,
307 &gk20a_fifo_sched_debugfs_fops);
308
309 profile_root = debugfs_create_dir("profile", fifo_root);
310 if (IS_ERR_OR_NULL(profile_root))
311 return;
312
313 nvgpu_mutex_init(&g->fifo.profile.lock);
314 g->fifo.profile.enabled = false;
315 atomic_set(&g->fifo.profile.get, 0);
316 atomic_set(&g->fifo.profile.ref.refcount, 0);
317
318 debugfs_create_file("enable", 0600, profile_root, g,
319 &gk20a_fifo_profile_enable_debugfs_fops);
320
321 debugfs_create_file("stats", 0600, profile_root, g,
322 &gk20a_fifo_profile_stats_debugfs_fops);
323
324}
325
326void __gk20a_fifo_profile_free(struct kref *ref)
327{
328 struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
329 profile.ref);
330 nvgpu_vfree(f->g, f->profile.data);
331 nvgpu_vfree(f->g, f->profile.sorted);
332}
333
334/* Get the next element in the ring buffer of profile entries
335 * and grab a reference to the structure
336 */
337struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
338{
339 struct fifo_gk20a *f = &g->fifo;
340 struct fifo_profile_gk20a *profile;
341 unsigned int index;
342
343 /* If kref is zero, profiling is not enabled */
344 if (!kref_get_unless_zero(&f->profile.ref))
345 return NULL;
346 index = atomic_inc_return(&f->profile.get);
347 profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
348
349 return profile;
350}
351
352/* Free the reference to the structure. This allows deferred cleanups */
353void gk20a_fifo_profile_release(struct gk20a *g,
354 struct fifo_profile_gk20a *profile)
355{
356 kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
357}
358
359void gk20a_fifo_debugfs_deinit(struct gk20a *g)
360{
361 struct fifo_gk20a *f = &g->fifo;
362
363 nvgpu_mutex_acquire(&f->profile.lock);
364 if (f->profile.enabled) {
365 f->profile.enabled = false;
366 kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
367 }
368 nvgpu_mutex_release(&f->profile.lock);
369}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.h b/drivers/gpu/nvgpu/common/linux/debug_fifo.h
new file mode 100644
index 00000000..46ac853e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.h
@@ -0,0 +1,22 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_FIFO_H__
16#define __NVGPU_DEBUG_FIFO_H__
17
18struct gk20a;
19void gk20a_fifo_debugfs_init(struct gk20a *g);
20void gk20a_fifo_debugfs_deinit(struct gk20a *g);
21
22#endif /* __NVGPU_DEBUG_FIFO_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_gr.c b/drivers/gpu/nvgpu/common/linux/debug_gr.c
new file mode 100644
index 00000000..56b8612e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_gr.c
@@ -0,0 +1,31 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_gr.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19
20int gr_gk20a_debugfs_init(struct gk20a *g)
21{
22 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
23
24 g->debugfs_gr_default_attrib_cb_size =
25 debugfs_create_u32("gr_default_attrib_cb_size",
26 S_IRUGO|S_IWUSR, platform->debugfs,
27 &g->gr.attrib_cb_default_size);
28
29 return 0;
30}
31
diff --git a/drivers/gpu/nvgpu/gm20b/debug_gm20b.c b/drivers/gpu/nvgpu/common/linux/debug_gr.h
index b266200c..4b46acbb 100644
--- a/drivers/gpu/nvgpu/gm20b/debug_gm20b.c
+++ b/drivers/gpu/nvgpu/common/linux/debug_gr.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2015 NVIDIA Corporation. All rights reserved. 2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 * 3 *
4 * This software is licensed under the terms of the GNU General Public 4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and 5 * License version 2, as published by the Free Software Foundation, and
@@ -12,10 +12,10 @@
12 * 12 *
13 */ 13 */
14 14
15#include "gk20a/gk20a.h" 15#ifndef __NVGPU_DEBUG_GR_H__
16#include "debug_gm20b.h" 16#define __NVGPU_DEBUG_GR_H__
17 17
18void gm20b_init_debug_ops(struct gpu_ops *gops) 18struct gk20a;
19{ 19int gr_gk20a_debugfs_init(struct gk20a *g);
20 gops->debug.show_dump = gk20a_debug_show_dump; 20
21} 21#endif /* __NVGPU_DEBUG_GR_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.c b/drivers/gpu/nvgpu/common/linux/debug_kmem.c
new file mode 100644
index 00000000..2ee542a8
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_kmem.c
@@ -0,0 +1,315 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_kmem.h"
16#include "kmem_priv.h"
17#include "gk20a/platform_gk20a.h"
18
19#include <linux/debugfs.h>
20#include <linux/seq_file.h>
21
22#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
23/**
24 * to_human_readable_bytes - Determine suffix for passed size.
25 *
26 * @bytes - Number of bytes to generate a suffix for.
27 * @hr_bytes [out] - The human readable number of bytes.
28 * @hr_suffix [out] - The suffix for the HR number of bytes.
29 *
30 * Computes a human readable decomposition of the passed number of bytes. The
31 * suffix for the bytes is passed back through the @hr_suffix pointer. The right
32 * number of bytes is then passed back in @hr_bytes. This returns the following
33 * ranges:
34 *
35 * 0 - 1023 B
36 * 1 - 1023 KB
37 * 1 - 1023 MB
38 * 1 - 1023 GB
39 * 1 - 1023 TB
40 * 1 - ... PB
41 */
42static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
43 const char **hr_suffix)
44{
45 static const char *suffixes[] =
46 { "B", "KB", "MB", "GB", "TB", "PB" };
47
48 u64 suffix_ind = 0;
49
50 while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
51 bytes >>= 10;
52 suffix_ind++;
53 }
54
55 /*
56 * Handle case where bytes > 1023PB.
57 */
58 suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
59 suffix_ind : ARRAY_SIZE(suffixes) - 1;
60
61 *hr_bytes = bytes;
62 *hr_suffix = suffixes[suffix_ind];
63}
64
65/**
66 * print_hr_bytes - Print human readable bytes
67 *
68 * @s - A seq_file to print to. May be NULL.
69 * @msg - A message to print before the bytes.
70 * @bytes - Number of bytes.
71 *
72 * Print @msg followed by the human readable decomposition of the passed number
73 * of bytes.
74 *
75 * If @s is NULL then this prints will be made to the kernel log.
76 */
77static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
78{
79 u64 hr_bytes;
80 const char *hr_suffix;
81
82 __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
83 __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
84}
85
86/**
87 * print_histogram - Build a histogram of the memory usage.
88 *
89 * @tracker The tracking to pull data from.
90 * @s A seq_file to dump info into.
91 */
92static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
93 struct seq_file *s)
94{
95 int i;
96 u64 pot_min, pot_max;
97 u64 nr_buckets;
98 unsigned int *buckets;
99 unsigned int total_allocs;
100 struct nvgpu_rbtree_node *node;
101 static const char histogram_line[] =
102 "++++++++++++++++++++++++++++++++++++++++";
103
104 /*
105 * pot_min is essentially a round down to the nearest power of 2. This
106 * is the start of the histogram. pot_max is just a round up to the
107 * nearest power of two. Each histogram bucket is one power of two so
108 * the histogram buckets are exponential.
109 */
110 pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
111 pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
112
113 nr_buckets = __ffs(pot_max) - __ffs(pot_min);
114
115 buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
116 if (!buckets) {
117 __pstat(s, "OOM: could not allocate bucket storage!?\n");
118 return;
119 }
120
121 /*
122 * Iterate across all of the allocs and determine what bucket they
123 * should go in. Round the size down to the nearest power of two to
124 * find the right bucket.
125 */
126 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
127 while (node) {
128 int b;
129 u64 bucket_min;
130 struct nvgpu_mem_alloc *alloc =
131 nvgpu_mem_alloc_from_rbtree_node(node);
132
133 bucket_min = (u64)rounddown_pow_of_two(alloc->size);
134 if (bucket_min < tracker->min_alloc)
135 bucket_min = tracker->min_alloc;
136
137 b = __ffs(bucket_min) - __ffs(pot_min);
138
139 /*
140 * Handle the one case were there's an alloc exactly as big as
141 * the maximum bucket size of the largest bucket. Most of the
142 * buckets have an inclusive minimum and exclusive maximum. But
143 * the largest bucket needs to have an _inclusive_ maximum as
144 * well.
145 */
146 if (b == (int)nr_buckets)
147 b--;
148
149 buckets[b]++;
150
151 nvgpu_rbtree_enum_next(&node, node);
152 }
153
154 total_allocs = 0;
155 for (i = 0; i < (int)nr_buckets; i++)
156 total_allocs += buckets[i];
157
158 __pstat(s, "Alloc histogram:\n");
159
160 /*
161 * Actually compute the histogram lines.
162 */
163 for (i = 0; i < (int)nr_buckets; i++) {
164 char this_line[sizeof(histogram_line) + 1];
165 u64 line_length;
166 u64 hr_bytes;
167 const char *hr_suffix;
168
169 memset(this_line, 0, sizeof(this_line));
170
171 /*
172 * Compute the normalized line length. Cant use floating point
173 * so we will just multiply everything by 1000 and use fixed
174 * point.
175 */
176 line_length = (1000 * buckets[i]) / total_allocs;
177 line_length *= sizeof(histogram_line);
178 line_length /= 1000;
179
180 memset(this_line, '+', line_length);
181
182 __to_human_readable_bytes(1 << (__ffs(pot_min) + i),
183 &hr_bytes, &hr_suffix);
184 __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
185 hr_bytes, hr_bytes << 1,
186 hr_suffix, buckets[i], this_line);
187 }
188}
189
190/**
191 * nvgpu_kmem_print_stats - Print kmem tracking stats.
192 *
193 * @tracker The tracking to pull data from.
194 * @s A seq_file to dump info into.
195 *
196 * Print stats from a tracker. If @s is non-null then seq_printf() will be
197 * used with @s. Otherwise the stats are pr_info()ed.
198 */
199void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
200 struct seq_file *s)
201{
202 nvgpu_lock_tracker(tracker);
203
204 __pstat(s, "Mem tracker: %s\n\n", tracker->name);
205
206 __pstat(s, "Basic Stats:\n");
207 __pstat(s, " Number of allocs %lld\n",
208 tracker->nr_allocs);
209 __pstat(s, " Number of frees %lld\n",
210 tracker->nr_frees);
211 print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
212 print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
213 print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
214 print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
215 print_hr_bytes(s, " Bytes allocated (real) ",
216 tracker->bytes_alloced_real);
217 print_hr_bytes(s, " Bytes freed (real) ",
218 tracker->bytes_freed_real);
219 __pstat(s, "\n");
220
221 print_histogram(tracker, s);
222
223 nvgpu_unlock_tracker(tracker);
224}
225
226static int __kmem_tracking_show(struct seq_file *s, void *unused)
227{
228 struct nvgpu_mem_alloc_tracker *tracker = s->private;
229
230 nvgpu_kmem_print_stats(tracker, s);
231
232 return 0;
233}
234
235static int __kmem_tracking_open(struct inode *inode, struct file *file)
236{
237 return single_open(file, __kmem_tracking_show, inode->i_private);
238}
239
240static const struct file_operations __kmem_tracking_fops = {
241 .open = __kmem_tracking_open,
242 .read = seq_read,
243 .llseek = seq_lseek,
244 .release = single_release,
245};
246
247static int __kmem_traces_dump_tracker(struct gk20a *g,
248 struct nvgpu_mem_alloc_tracker *tracker,
249 struct seq_file *s)
250{
251 struct nvgpu_rbtree_node *node;
252
253 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
254 while (node) {
255 struct nvgpu_mem_alloc *alloc =
256 nvgpu_mem_alloc_from_rbtree_node(node);
257
258 kmem_print_mem_alloc(g, alloc, s);
259
260 nvgpu_rbtree_enum_next(&node, node);
261 }
262
263 return 0;
264}
265
266static int __kmem_traces_show(struct seq_file *s, void *unused)
267{
268 struct gk20a *g = s->private;
269
270 nvgpu_lock_tracker(g->vmallocs);
271 seq_puts(s, "Oustanding vmallocs:\n");
272 __kmem_traces_dump_tracker(g, g->vmallocs, s);
273 seq_puts(s, "\n");
274 nvgpu_unlock_tracker(g->vmallocs);
275
276 nvgpu_lock_tracker(g->kmallocs);
277 seq_puts(s, "Oustanding kmallocs:\n");
278 __kmem_traces_dump_tracker(g, g->kmallocs, s);
279 nvgpu_unlock_tracker(g->kmallocs);
280
281 return 0;
282}
283
284static int __kmem_traces_open(struct inode *inode, struct file *file)
285{
286 return single_open(file, __kmem_traces_show, inode->i_private);
287}
288
289static const struct file_operations __kmem_traces_fops = {
290 .open = __kmem_traces_open,
291 .read = seq_read,
292 .llseek = seq_lseek,
293 .release = single_release,
294};
295
296void nvgpu_kmem_debugfs_init(struct gk20a *g)
297{
298 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
299 struct dentry *node;
300
301 g->debugfs_kmem = debugfs_create_dir("kmem_tracking", platform->debugfs);
302 if (IS_ERR_OR_NULL(g->debugfs_kmem))
303 return;
304
305 node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
306 g->debugfs_kmem,
307 g->vmallocs, &__kmem_tracking_fops);
308 node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
309 g->debugfs_kmem,
310 g->kmallocs, &__kmem_tracking_fops);
311 node = debugfs_create_file("traces", S_IRUGO,
312 g->debugfs_kmem,
313 g, &__kmem_traces_fops);
314}
315#endif
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.h b/drivers/gpu/nvgpu/common/linux/debug_kmem.h
new file mode 100644
index 00000000..44322b53
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_kmem.h
@@ -0,0 +1,23 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_KMEM_H__
16#define __NVGPU_DEBUG_KMEM_H__
17
18struct gk20a;
19#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
20void nvgpu_kmem_debugfs_init(struct gk20a *g);
21#endif
22
23#endif /* __NVGPU_DEBUG_KMEM_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_mm.c b/drivers/gpu/nvgpu/common/linux/debug_mm.c
new file mode 100644
index 00000000..1e260f89
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_mm.c
@@ -0,0 +1,26 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_mm.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19
20void gk20a_mm_debugfs_init(struct gk20a *g)
21{
22 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
23
24 debugfs_create_bool("force_pramin", 0664, platform->debugfs,
25 &g->mm.force_pramin);
26}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_mm.h b/drivers/gpu/nvgpu/common/linux/debug_mm.h
new file mode 100644
index 00000000..bf7bc985
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_mm.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_MM_H__
16#define __NVGPU_DEBUG_MM_H__
17
18struct gk20a;
19void gk20a_mm_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_MM_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.c b/drivers/gpu/nvgpu/common/linux/debug_pmu.c
new file mode 100644
index 00000000..f19f5139
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.c
@@ -0,0 +1,479 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_pmu.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20#include <linux/uaccess.h>
21
22static int lpwr_debug_show(struct seq_file *s, void *data)
23{
24 struct gk20a *g = s->private;
25
26 if (g->ops.pmu.pmu_pg_engines_feature_list &&
27 g->ops.pmu.pmu_pg_engines_feature_list(g,
28 PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
29 PMU_PG_FEATURE_GR_POWER_GATING_ENABLED) {
30 seq_printf(s, "PSTATE: %u\n"
31 "RPPG Enabled: %u\n"
32 "RPPG ref count: %u\n"
33 "RPPG state: %u\n"
34 "MSCG Enabled: %u\n"
35 "MSCG pstate state: %u\n"
36 "MSCG transition state: %u\n",
37 g->ops.clk_arb.get_current_pstate(g),
38 g->elpg_enabled, g->pmu.elpg_refcnt,
39 g->pmu.elpg_stat, g->mscg_enabled,
40 g->pmu.mscg_stat, g->pmu.mscg_transition_state);
41
42 } else
43 seq_printf(s, "ELPG Enabled: %u\n"
44 "ELPG ref count: %u\n"
45 "ELPG state: %u\n",
46 g->elpg_enabled, g->pmu.elpg_refcnt,
47 g->pmu.elpg_stat);
48
49 return 0;
50
51}
52
53static int lpwr_debug_open(struct inode *inode, struct file *file)
54{
55 return single_open(file, lpwr_debug_show, inode->i_private);
56}
57
58static const struct file_operations lpwr_debug_fops = {
59 .open = lpwr_debug_open,
60 .read = seq_read,
61 .llseek = seq_lseek,
62 .release = single_release,
63};
64
65static int mscg_stat_show(struct seq_file *s, void *data)
66{
67 struct gk20a *g = s->private;
68 u64 total_ingating, total_ungating, residency, divisor, dividend;
69 struct pmu_pg_stats_data pg_stat_data = { 0 };
70 int err;
71
72 /* Don't unnecessarily power on the device */
73 if (g->power_on) {
74 err = gk20a_busy(g);
75 if (err)
76 return err;
77
78 gk20a_pmu_get_pg_stats(g,
79 PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
80 gk20a_idle(g);
81 }
82 total_ingating = g->pg_ingating_time_us +
83 (u64)pg_stat_data.ingating_time;
84 total_ungating = g->pg_ungating_time_us +
85 (u64)pg_stat_data.ungating_time;
86
87 divisor = total_ingating + total_ungating;
88
89 /* We compute the residency on a scale of 1000 */
90 dividend = total_ingating * 1000;
91
92 if (divisor)
93 residency = div64_u64(dividend, divisor);
94 else
95 residency = 0;
96
97 seq_printf(s,
98 "Time in MSCG: %llu us\n"
99 "Time out of MSCG: %llu us\n"
100 "MSCG residency ratio: %llu\n"
101 "MSCG Entry Count: %u\n"
102 "MSCG Avg Entry latency %u\n"
103 "MSCG Avg Exit latency %u\n",
104 total_ingating, total_ungating,
105 residency, pg_stat_data.gating_cnt,
106 pg_stat_data.avg_entry_latency_us,
107 pg_stat_data.avg_exit_latency_us);
108 return 0;
109
110}
111
112static int mscg_stat_open(struct inode *inode, struct file *file)
113{
114 return single_open(file, mscg_stat_show, inode->i_private);
115}
116
117static const struct file_operations mscg_stat_fops = {
118 .open = mscg_stat_open,
119 .read = seq_read,
120 .llseek = seq_lseek,
121 .release = single_release,
122};
123
124static int mscg_transitions_show(struct seq_file *s, void *data)
125{
126 struct gk20a *g = s->private;
127 struct pmu_pg_stats_data pg_stat_data = { 0 };
128 u32 total_gating_cnt;
129 int err;
130
131 if (g->power_on) {
132 err = gk20a_busy(g);
133 if (err)
134 return err;
135
136 gk20a_pmu_get_pg_stats(g,
137 PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
138 gk20a_idle(g);
139 }
140 total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
141
142 seq_printf(s, "%u\n", total_gating_cnt);
143 return 0;
144
145}
146
147static int mscg_transitions_open(struct inode *inode, struct file *file)
148{
149 return single_open(file, mscg_transitions_show, inode->i_private);
150}
151
152static const struct file_operations mscg_transitions_fops = {
153 .open = mscg_transitions_open,
154 .read = seq_read,
155 .llseek = seq_lseek,
156 .release = single_release,
157};
158
159static int elpg_stat_show(struct seq_file *s, void *data)
160{
161 struct gk20a *g = s->private;
162 struct pmu_pg_stats_data pg_stat_data = { 0 };
163 u64 total_ingating, total_ungating, residency, divisor, dividend;
164 int err;
165
166 /* Don't unnecessarily power on the device */
167 if (g->power_on) {
168 err = gk20a_busy(g);
169 if (err)
170 return err;
171
172 gk20a_pmu_get_pg_stats(g,
173 PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
174 gk20a_idle(g);
175 }
176 total_ingating = g->pg_ingating_time_us +
177 (u64)pg_stat_data.ingating_time;
178 total_ungating = g->pg_ungating_time_us +
179 (u64)pg_stat_data.ungating_time;
180 divisor = total_ingating + total_ungating;
181
182 /* We compute the residency on a scale of 1000 */
183 dividend = total_ingating * 1000;
184
185 if (divisor)
186 residency = div64_u64(dividend, divisor);
187 else
188 residency = 0;
189
190 seq_printf(s,
191 "Time in ELPG: %llu us\n"
192 "Time out of ELPG: %llu us\n"
193 "ELPG residency ratio: %llu\n"
194 "ELPG Entry Count: %u\n"
195 "ELPG Avg Entry latency %u us\n"
196 "ELPG Avg Exit latency %u us\n",
197 total_ingating, total_ungating,
198 residency, pg_stat_data.gating_cnt,
199 pg_stat_data.avg_entry_latency_us,
200 pg_stat_data.avg_exit_latency_us);
201 return 0;
202
203}
204
205static int elpg_stat_open(struct inode *inode, struct file *file)
206{
207 return single_open(file, elpg_stat_show, inode->i_private);
208}
209
210static const struct file_operations elpg_stat_fops = {
211 .open = elpg_stat_open,
212 .read = seq_read,
213 .llseek = seq_lseek,
214 .release = single_release,
215};
216
217static int elpg_transitions_show(struct seq_file *s, void *data)
218{
219 struct gk20a *g = s->private;
220 struct pmu_pg_stats_data pg_stat_data = { 0 };
221 u32 total_gating_cnt;
222 int err;
223
224 if (g->power_on) {
225 err = gk20a_busy(g);
226 if (err)
227 return err;
228
229 gk20a_pmu_get_pg_stats(g,
230 PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
231 gk20a_idle(g);
232 }
233 total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
234
235 seq_printf(s, "%u\n", total_gating_cnt);
236 return 0;
237
238}
239
240static int elpg_transitions_open(struct inode *inode, struct file *file)
241{
242 return single_open(file, elpg_transitions_show, inode->i_private);
243}
244
245static const struct file_operations elpg_transitions_fops = {
246 .open = elpg_transitions_open,
247 .read = seq_read,
248 .llseek = seq_lseek,
249 .release = single_release,
250};
251
252static int falc_trace_show(struct seq_file *s, void *data)
253{
254 struct gk20a *g = s->private;
255 struct pmu_gk20a *pmu = &g->pmu;
256 u32 i = 0, j = 0, k, l, m;
257 char part_str[40];
258 void *tracebuffer;
259 char *trace;
260 u32 *trace1;
261
262 /* allocate system memory to copy pmu trace buffer */
263 tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
264 if (tracebuffer == NULL)
265 return -ENOMEM;
266
267 /* read pmu traces into system memory buffer */
268 nvgpu_mem_rd_n(g, &pmu->trace_buf,
269 0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE);
270
271 trace = (char *)tracebuffer;
272 trace1 = (u32 *)tracebuffer;
273
274 for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
275 for (j = 0; j < 0x40; j++)
276 if (trace1[(i / 4) + j])
277 break;
278 if (j == 0x40)
279 break;
280 seq_printf(s, "Index %x: ", trace1[(i / 4)]);
281 l = 0;
282 m = 0;
283 while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) {
284 if (k >= 40)
285 break;
286 strncpy(part_str, (trace+i+20+m), k);
287 part_str[k] = 0;
288 seq_printf(s, "%s0x%x", part_str,
289 trace1[(i / 4) + 1 + l]);
290 l++;
291 m += k + 2;
292 }
293 seq_printf(s, "%s", (trace+i+20+m));
294 }
295
296 nvgpu_kfree(g, tracebuffer);
297 return 0;
298}
299
300static int falc_trace_open(struct inode *inode, struct file *file)
301{
302 return single_open(file, falc_trace_show, inode->i_private);
303}
304
305static const struct file_operations falc_trace_fops = {
306 .open = falc_trace_open,
307 .read = seq_read,
308 .llseek = seq_lseek,
309 .release = single_release,
310};
311
312static int perfmon_events_enable_show(struct seq_file *s, void *data)
313{
314 struct gk20a *g = s->private;
315
316 seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
317 return 0;
318
319}
320
321static int perfmon_events_enable_open(struct inode *inode, struct file *file)
322{
323 return single_open(file, perfmon_events_enable_show, inode->i_private);
324}
325
326static ssize_t perfmon_events_enable_write(struct file *file,
327 const char __user *userbuf, size_t count, loff_t *ppos)
328{
329 struct seq_file *s = file->private_data;
330 struct gk20a *g = s->private;
331 unsigned long val = 0;
332 char buf[40];
333 int buf_size;
334 int err;
335
336 memset(buf, 0, sizeof(buf));
337 buf_size = min(count, (sizeof(buf)-1));
338
339 if (copy_from_user(buf, userbuf, buf_size))
340 return -EFAULT;
341
342 if (kstrtoul(buf, 10, &val) < 0)
343 return -EINVAL;
344
345 /* Don't turn on gk20a unnecessarily */
346 if (g->power_on) {
347 err = gk20a_busy(g);
348 if (err)
349 return err;
350
351 if (val && !g->pmu.perfmon_sampling_enabled) {
352 g->pmu.perfmon_sampling_enabled = true;
353 nvgpu_pmu_perfmon_start_sampling(&(g->pmu));
354 } else if (!val && g->pmu.perfmon_sampling_enabled) {
355 g->pmu.perfmon_sampling_enabled = false;
356 nvgpu_pmu_perfmon_stop_sampling(&(g->pmu));
357 }
358 gk20a_idle(g);
359 } else {
360 g->pmu.perfmon_sampling_enabled = val ? true : false;
361 }
362
363 return count;
364}
365
366static const struct file_operations perfmon_events_enable_fops = {
367 .open = perfmon_events_enable_open,
368 .read = seq_read,
369 .write = perfmon_events_enable_write,
370 .llseek = seq_lseek,
371 .release = single_release,
372};
373
374static int perfmon_events_count_show(struct seq_file *s, void *data)
375{
376 struct gk20a *g = s->private;
377
378 seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
379 return 0;
380
381}
382
383static int perfmon_events_count_open(struct inode *inode, struct file *file)
384{
385 return single_open(file, perfmon_events_count_show, inode->i_private);
386}
387
388static const struct file_operations perfmon_events_count_fops = {
389 .open = perfmon_events_count_open,
390 .read = seq_read,
391 .llseek = seq_lseek,
392 .release = single_release,
393};
394
395static int security_show(struct seq_file *s, void *data)
396{
397 struct gk20a *g = s->private;
398
399 seq_printf(s, "%d\n", g->pmu.pmu_mode);
400 return 0;
401
402}
403
404static int security_open(struct inode *inode, struct file *file)
405{
406 return single_open(file, security_show, inode->i_private);
407}
408
409static const struct file_operations security_fops = {
410 .open = security_open,
411 .read = seq_read,
412 .llseek = seq_lseek,
413 .release = single_release,
414};
415
416int gk20a_pmu_debugfs_init(struct gk20a *g)
417{
418 struct dentry *d;
419 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
420
421 d = debugfs_create_file(
422 "lpwr_debug", S_IRUGO|S_IWUSR, platform->debugfs, g,
423 &lpwr_debug_fops);
424 if (!d)
425 goto err_out;
426
427 d = debugfs_create_file(
428 "mscg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
429 &mscg_stat_fops);
430 if (!d)
431 goto err_out;
432
433 d = debugfs_create_file(
434 "mscg_transitions", S_IRUGO, platform->debugfs, g,
435 &mscg_transitions_fops);
436 if (!d)
437 goto err_out;
438
439 d = debugfs_create_file(
440 "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
441 &elpg_stat_fops);
442 if (!d)
443 goto err_out;
444
445 d = debugfs_create_file(
446 "elpg_transitions", S_IRUGO, platform->debugfs, g,
447 &elpg_transitions_fops);
448 if (!d)
449 goto err_out;
450
451 d = debugfs_create_file(
452 "falc_trace", S_IRUGO, platform->debugfs, g,
453 &falc_trace_fops);
454 if (!d)
455 goto err_out;
456
457 d = debugfs_create_file(
458 "perfmon_events_enable", S_IRUGO, platform->debugfs, g,
459 &perfmon_events_enable_fops);
460 if (!d)
461 goto err_out;
462
463 d = debugfs_create_file(
464 "perfmon_events_count", S_IRUGO, platform->debugfs, g,
465 &perfmon_events_count_fops);
466 if (!d)
467 goto err_out;
468
469 d = debugfs_create_file(
470 "pmu_security", S_IRUGO, platform->debugfs, g,
471 &security_fops);
472 if (!d)
473 goto err_out;
474 return 0;
475err_out:
476 pr_err("%s: Failed to make debugfs node\n", __func__);
477 debugfs_remove_recursive(platform->debugfs);
478 return -ENOMEM;
479}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.h b/drivers/gpu/nvgpu/common/linux/debug_pmu.h
new file mode 100644
index 00000000..c4e3243d
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_PMU_H__
16#define __NVGPU_DEBUG_PMU_H__
17
18struct gk20a;
19int gk20a_pmu_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_PMU_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.c b/drivers/gpu/nvgpu/common/linux/debug_sched.c
new file mode 100644
index 00000000..40b93149
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_sched.c
@@ -0,0 +1,79 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_sched.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
22{
23 struct gk20a *g = s->private;
24 struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
25 bool sched_busy = true;
26
27 int n = sched->bitmap_size / sizeof(u64);
28 int i;
29 int err;
30
31 err = gk20a_busy(g);
32 if (err)
33 return err;
34
35 if (nvgpu_mutex_tryacquire(&sched->busy_lock)) {
36 sched_busy = false;
37 nvgpu_mutex_release(&sched->busy_lock);
38 }
39
40 seq_printf(s, "control_locked=%d\n", sched->control_locked);
41 seq_printf(s, "busy=%d\n", sched_busy);
42 seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
43
44 nvgpu_mutex_acquire(&sched->status_lock);
45
46 seq_puts(s, "active_tsg_bitmap\n");
47 for (i = 0; i < n; i++)
48 seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]);
49
50 seq_puts(s, "recent_tsg_bitmap\n");
51 for (i = 0; i < n; i++)
52 seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
53
54 nvgpu_mutex_release(&sched->status_lock);
55
56 gk20a_idle(g);
57
58 return 0;
59}
60
61static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file)
62{
63 return single_open(file, gk20a_sched_debugfs_show, inode->i_private);
64}
65
66static const struct file_operations gk20a_sched_debugfs_fops = {
67 .open = gk20a_sched_debugfs_open,
68 .read = seq_read,
69 .llseek = seq_lseek,
70 .release = single_release,
71};
72
73void gk20a_sched_debugfs_init(struct gk20a *g)
74{
75 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
76
77 debugfs_create_file("sched_ctrl", S_IRUGO, platform->debugfs,
78 g, &gk20a_sched_debugfs_fops);
79}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.h b/drivers/gpu/nvgpu/common/linux/debug_sched.h
new file mode 100644
index 00000000..34a8f55f
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_sched.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#ifndef __NVGPU_DEBUG_SCHED_H__
16#define __NVGPU_DEBUG_SCHED_H__
17
18struct gk20a;
19void gk20a_sched_debugfs_init(struct gk20a *g);
20
21#endif /* __NVGPU_DEBUG_SCHED_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.c b/drivers/gpu/nvgpu/common/linux/driver_common.c
index 80e7698b..f85016d4 100644
--- a/drivers/gpu/nvgpu/common/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/common/linux/driver_common.c
@@ -21,6 +21,7 @@
21#include <nvgpu/soc.h> 21#include <nvgpu/soc.h>
22#include <nvgpu/bug.h> 22#include <nvgpu/bug.h>
23#include <nvgpu/enabled.h> 23#include <nvgpu/enabled.h>
24#include <nvgpu/debug.h>
24 25
25#include "gk20a/gk20a_scale.h" 26#include "gk20a/gk20a_scale.h"
26#include "gk20a/gk20a.h" 27#include "gk20a/gk20a.h"
@@ -182,7 +183,7 @@ int nvgpu_probe(struct gk20a *g,
182 nvgpu_init_mm_vars(g); 183 nvgpu_init_mm_vars(g);
183 184
184 gk20a_create_sysfs(g->dev); 185 gk20a_create_sysfs(g->dev);
185 gk20a_debug_init(g->dev, debugfs_symlink); 186 gk20a_debug_init(g, debugfs_symlink);
186 187
187 g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K); 188 g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
188 if (!g->dbg_regops_tmp_buf) { 189 if (!g->dbg_regops_tmp_buf) {
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
index 2502ff30..d81328f0 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -26,9 +26,9 @@
26#include <nvgpu/kmem.h> 26#include <nvgpu/kmem.h>
27#include <nvgpu/log.h> 27#include <nvgpu/log.h>
28#include <nvgpu/list.h> 28#include <nvgpu/list.h>
29#include <nvgpu/debug.h>
29 30
30#include "gk20a/gk20a.h" 31#include "gk20a/gk20a.h"
31#include "gk20a/debug_gk20a.h"
32#include "gk20a/ctxsw_trace_gk20a.h" 32#include "gk20a/ctxsw_trace_gk20a.h"
33#include "gk20a/dbg_gpu_gk20a.h" 33#include "gk20a/dbg_gpu_gk20a.h"
34#include "gk20a/fence_gk20a.h" 34#include "gk20a/fence_gk20a.h"
diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c
index d058eba5..41aaa729 100644
--- a/drivers/gpu/nvgpu/common/linux/kmem.c
+++ b/drivers/gpu/nvgpu/common/linux/kmem.c
@@ -134,19 +134,19 @@ void __nvgpu_vfree(struct gk20a *g, void *addr)
134 134
135#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE 135#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
136 136
137static void lock_tracker(struct nvgpu_mem_alloc_tracker *tracker) 137void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
138{ 138{
139 nvgpu_mutex_acquire(&tracker->lock); 139 nvgpu_mutex_acquire(&tracker->lock);
140} 140}
141 141
142static void unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker) 142void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
143{ 143{
144 nvgpu_mutex_release(&tracker->lock); 144 nvgpu_mutex_release(&tracker->lock);
145} 145}
146 146
147static void kmem_print_mem_alloc(struct gk20a *g, 147void kmem_print_mem_alloc(struct gk20a *g,
148 struct nvgpu_mem_alloc *alloc, 148 struct nvgpu_mem_alloc *alloc,
149 struct seq_file *s) 149 struct seq_file *s)
150{ 150{
151#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES 151#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
152 int i; 152 int i;
@@ -231,7 +231,7 @@ static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
231 alloc->stack_length = stack_trace.nr_entries; 231 alloc->stack_length = stack_trace.nr_entries;
232#endif 232#endif
233 233
234 lock_tracker(tracker); 234 nvgpu_lock_tracker(tracker);
235 tracker->bytes_alloced += size; 235 tracker->bytes_alloced += size;
236 tracker->bytes_alloced_real += real_size; 236 tracker->bytes_alloced_real += real_size;
237 tracker->nr_allocs++; 237 tracker->nr_allocs++;
@@ -246,10 +246,10 @@ static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
246 if (ret) { 246 if (ret) {
247 WARN(1, "Duplicate alloc??? 0x%llx\n", addr); 247 WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
248 kfree(alloc); 248 kfree(alloc);
249 unlock_tracker(tracker); 249 nvgpu_unlock_tracker(tracker);
250 return ret; 250 return ret;
251 } 251 }
252 unlock_tracker(tracker); 252 nvgpu_unlock_tracker(tracker);
253 253
254 return 0; 254 return 0;
255} 255}
@@ -259,17 +259,17 @@ static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
259{ 259{
260 struct nvgpu_mem_alloc *alloc; 260 struct nvgpu_mem_alloc *alloc;
261 261
262 lock_tracker(tracker); 262 nvgpu_lock_tracker(tracker);
263 alloc = nvgpu_rem_alloc(tracker, addr); 263 alloc = nvgpu_rem_alloc(tracker, addr);
264 if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) { 264 if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
265 unlock_tracker(tracker); 265 nvgpu_unlock_tracker(tracker);
266 return -EINVAL; 266 return -EINVAL;
267 } 267 }
268 268
269 tracker->nr_frees++; 269 tracker->nr_frees++;
270 tracker->bytes_freed += alloc->size; 270 tracker->bytes_freed += alloc->size;
271 tracker->bytes_freed_real += alloc->real_size; 271 tracker->bytes_freed_real += alloc->real_size;
272 unlock_tracker(tracker); 272 nvgpu_unlock_tracker(tracker);
273 273
274 return 0; 274 return 0;
275} 275}
@@ -407,307 +407,6 @@ void __nvgpu_track_kfree(struct gk20a *g, void *addr)
407 __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr); 407 __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
408} 408}
409 409
410/**
411 * to_human_readable_bytes - Determine suffix for passed size.
412 *
413 * @bytes - Number of bytes to generate a suffix for.
414 * @hr_bytes [out] - The human readable number of bytes.
415 * @hr_suffix [out] - The suffix for the HR number of bytes.
416 *
417 * Computes a human readable decomposition of the passed number of bytes. The
418 * suffix for the bytes is passed back through the @hr_suffix pointer. The right
419 * number of bytes is then passed back in @hr_bytes. This returns the following
420 * ranges:
421 *
422 * 0 - 1023 B
423 * 1 - 1023 KB
424 * 1 - 1023 MB
425 * 1 - 1023 GB
426 * 1 - 1023 TB
427 * 1 - ... PB
428 */
429static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
430 const char **hr_suffix)
431{
432 static const char *suffixes[] =
433 { "B", "KB", "MB", "GB", "TB", "PB" };
434
435 u64 suffix_ind = 0;
436
437 while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
438 bytes >>= 10;
439 suffix_ind++;
440 }
441
442 /*
443 * Handle case where bytes > 1023PB.
444 */
445 suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
446 suffix_ind : ARRAY_SIZE(suffixes) - 1;
447
448 *hr_bytes = bytes;
449 *hr_suffix = suffixes[suffix_ind];
450}
451
452/**
453 * print_hr_bytes - Print human readable bytes
454 *
455 * @s - A seq_file to print to. May be NULL.
456 * @msg - A message to print before the bytes.
457 * @bytes - Number of bytes.
458 *
459 * Print @msg followed by the human readable decomposition of the passed number
460 * of bytes.
461 *
462 * If @s is NULL then this prints will be made to the kernel log.
463 */
464static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
465{
466 u64 hr_bytes;
467 const char *hr_suffix;
468
469 __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
470 __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
471}
472
473/**
474 * print_histogram - Build a histogram of the memory usage.
475 *
476 * @tracker The tracking to pull data from.
477 * @s A seq_file to dump info into.
478 */
479static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
480 struct seq_file *s)
481{
482 int i;
483 u64 pot_min, pot_max;
484 u64 nr_buckets;
485 unsigned int *buckets;
486 unsigned int total_allocs;
487 struct nvgpu_rbtree_node *node;
488 static const char histogram_line[] =
489 "++++++++++++++++++++++++++++++++++++++++";
490
491 /*
492 * pot_min is essentially a round down to the nearest power of 2. This
493 * is the start of the histogram. pot_max is just a round up to the
494 * nearest power of two. Each histogram bucket is one power of two so
495 * the histogram buckets are exponential.
496 */
497 pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
498 pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
499
500 nr_buckets = __ffs(pot_max) - __ffs(pot_min);
501
502 buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
503 if (!buckets) {
504 __pstat(s, "OOM: could not allocate bucket storage!?\n");
505 return;
506 }
507
508 /*
509 * Iterate across all of the allocs and determine what bucket they
510 * should go in. Round the size down to the nearest power of two to
511 * find the right bucket.
512 */
513 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
514 while (node) {
515 int b;
516 u64 bucket_min;
517 struct nvgpu_mem_alloc *alloc =
518 nvgpu_mem_alloc_from_rbtree_node(node);
519
520 bucket_min = (u64)rounddown_pow_of_two(alloc->size);
521 if (bucket_min < tracker->min_alloc)
522 bucket_min = tracker->min_alloc;
523
524 b = __ffs(bucket_min) - __ffs(pot_min);
525
526 /*
527 * Handle the one case were there's an alloc exactly as big as
528 * the maximum bucket size of the largest bucket. Most of the
529 * buckets have an inclusive minimum and exclusive maximum. But
530 * the largest bucket needs to have an _inclusive_ maximum as
531 * well.
532 */
533 if (b == (int)nr_buckets)
534 b--;
535
536 buckets[b]++;
537
538 nvgpu_rbtree_enum_next(&node, node);
539 }
540
541 total_allocs = 0;
542 for (i = 0; i < (int)nr_buckets; i++)
543 total_allocs += buckets[i];
544
545 __pstat(s, "Alloc histogram:\n");
546
547 /*
548 * Actually compute the histogram lines.
549 */
550 for (i = 0; i < (int)nr_buckets; i++) {
551 char this_line[sizeof(histogram_line) + 1];
552 u64 line_length;
553 u64 hr_bytes;
554 const char *hr_suffix;
555
556 memset(this_line, 0, sizeof(this_line));
557
558 /*
559 * Compute the normalized line length. Cant use floating point
560 * so we will just multiply everything by 1000 and use fixed
561 * point.
562 */
563 line_length = (1000 * buckets[i]) / total_allocs;
564 line_length *= sizeof(histogram_line);
565 line_length /= 1000;
566
567 memset(this_line, '+', line_length);
568
569 __to_human_readable_bytes(1 << (__ffs(pot_min) + i),
570 &hr_bytes, &hr_suffix);
571 __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
572 hr_bytes, hr_bytes << 1,
573 hr_suffix, buckets[i], this_line);
574 }
575}
576
577#ifdef CONFIG_DEBUG_FS
578/**
579 * nvgpu_kmem_print_stats - Print kmem tracking stats.
580 *
581 * @tracker The tracking to pull data from.
582 * @s A seq_file to dump info into.
583 *
584 * Print stats from a tracker. If @s is non-null then seq_printf() will be
585 * used with @s. Otherwise the stats are pr_info()ed.
586 */
587void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
588 struct seq_file *s)
589{
590 lock_tracker(tracker);
591
592 __pstat(s, "Mem tracker: %s\n\n", tracker->name);
593
594 __pstat(s, "Basic Stats:\n");
595 __pstat(s, " Number of allocs %lld\n",
596 tracker->nr_allocs);
597 __pstat(s, " Number of frees %lld\n",
598 tracker->nr_frees);
599 print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
600 print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
601 print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
602 print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
603 print_hr_bytes(s, " Bytes allocated (real) ",
604 tracker->bytes_alloced_real);
605 print_hr_bytes(s, " Bytes freed (real) ",
606 tracker->bytes_freed_real);
607 __pstat(s, "\n");
608
609 print_histogram(tracker, s);
610
611 unlock_tracker(tracker);
612}
613
614static int __kmem_tracking_show(struct seq_file *s, void *unused)
615{
616 struct nvgpu_mem_alloc_tracker *tracker = s->private;
617
618 nvgpu_kmem_print_stats(tracker, s);
619
620 return 0;
621}
622
623static int __kmem_tracking_open(struct inode *inode, struct file *file)
624{
625 return single_open(file, __kmem_tracking_show, inode->i_private);
626}
627
628static const struct file_operations __kmem_tracking_fops = {
629 .open = __kmem_tracking_open,
630 .read = seq_read,
631 .llseek = seq_lseek,
632 .release = single_release,
633};
634
635static int __kmem_traces_dump_tracker(struct gk20a *g,
636 struct nvgpu_mem_alloc_tracker *tracker,
637 struct seq_file *s)
638{
639 struct nvgpu_rbtree_node *node;
640
641 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
642 while (node) {
643 struct nvgpu_mem_alloc *alloc =
644 nvgpu_mem_alloc_from_rbtree_node(node);
645
646 kmem_print_mem_alloc(g, alloc, s);
647
648 nvgpu_rbtree_enum_next(&node, node);
649 }
650
651 return 0;
652}
653
654static int __kmem_traces_show(struct seq_file *s, void *unused)
655{
656 struct gk20a *g = s->private;
657
658 lock_tracker(g->vmallocs);
659 seq_puts(s, "Oustanding vmallocs:\n");
660 __kmem_traces_dump_tracker(g, g->vmallocs, s);
661 seq_puts(s, "\n");
662 unlock_tracker(g->vmallocs);
663
664 lock_tracker(g->kmallocs);
665 seq_puts(s, "Oustanding kmallocs:\n");
666 __kmem_traces_dump_tracker(g, g->kmallocs, s);
667 unlock_tracker(g->kmallocs);
668
669 return 0;
670}
671
672static int __kmem_traces_open(struct inode *inode, struct file *file)
673{
674 return single_open(file, __kmem_traces_show, inode->i_private);
675}
676
677static const struct file_operations __kmem_traces_fops = {
678 .open = __kmem_traces_open,
679 .read = seq_read,
680 .llseek = seq_lseek,
681 .release = single_release,
682};
683
684void nvgpu_kmem_debugfs_init(struct device *dev)
685{
686 struct gk20a_platform *plat = dev_get_drvdata(dev);
687 struct gk20a *g = get_gk20a(dev);
688 struct dentry *gpu_root = plat->debugfs;
689 struct dentry *node;
690
691 g->debugfs_kmem = debugfs_create_dir("kmem_tracking", gpu_root);
692 if (IS_ERR_OR_NULL(g->debugfs_kmem))
693 return;
694
695 node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
696 g->debugfs_kmem,
697 g->vmallocs, &__kmem_tracking_fops);
698 node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
699 g->debugfs_kmem,
700 g->kmallocs, &__kmem_tracking_fops);
701 node = debugfs_create_file("traces", S_IRUGO,
702 g->debugfs_kmem,
703 g, &__kmem_traces_fops);
704}
705#else
706void nvgpu_kmem_debugfs_init(struct device *dev)
707{
708}
709#endif
710
711static int __do_check_for_outstanding_allocs( 410static int __do_check_for_outstanding_allocs(
712 struct gk20a *g, 411 struct gk20a *g,
713 struct nvgpu_mem_alloc_tracker *tracker, 412 struct nvgpu_mem_alloc_tracker *tracker,
diff --git a/drivers/gpu/nvgpu/common/linux/kmem_priv.h b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
index d3abb378..a41762af 100644
--- a/drivers/gpu/nvgpu/common/linux/kmem_priv.h
+++ b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
@@ -20,6 +20,8 @@
20#include <nvgpu/rbtree.h> 20#include <nvgpu/rbtree.h>
21#include <nvgpu/lock.h> 21#include <nvgpu/lock.h>
22 22
23struct seq_file;
24
23#define __pstat(s, fmt, msg...) \ 25#define __pstat(s, fmt, msg...) \
24 do { \ 26 do { \
25 if (s) \ 27 if (s) \
@@ -92,6 +94,12 @@ struct nvgpu_mem_alloc_tracker {
92 unsigned long max_alloc; 94 unsigned long max_alloc;
93}; 95};
94 96
97void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
98void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
99
100void kmem_print_mem_alloc(struct gk20a *g,
101 struct nvgpu_mem_alloc *alloc,
102 struct seq_file *s);
95#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ 103#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
96 104
97#endif /* __KMEM_PRIV_H__ */ 105#endif /* __KMEM_PRIV_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index d5fc40de..4f7fc3fa 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -29,6 +29,7 @@
29#include <nvgpu/nvgpu_common.h> 29#include <nvgpu/nvgpu_common.h>
30#include <nvgpu/soc.h> 30#include <nvgpu/soc.h>
31#include <nvgpu/enabled.h> 31#include <nvgpu/enabled.h>
32#include <nvgpu/debug.h>
32 33
33#include "gk20a/gk20a.h" 34#include "gk20a/gk20a.h"
34#include "gk20a/platform_gk20a.h" 35#include "gk20a/platform_gk20a.h"
@@ -970,10 +971,7 @@ static int __exit gk20a_remove(struct platform_device *pdev)
970 971
971 gk20a_user_deinit(dev, &nvgpu_class); 972 gk20a_user_deinit(dev, &nvgpu_class);
972 973
973#ifdef CONFIG_DEBUG_FS 974 gk20a_debug_deinit(g);
974 debugfs_remove_recursive(platform->debugfs);
975 debugfs_remove_recursive(platform->debugfs_alias);
976#endif
977 975
978 gk20a_remove_sysfs(dev); 976 gk20a_remove_sysfs(dev);
979 977
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
index 40ee199a..eae0475a 100644
--- a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
@@ -411,7 +411,9 @@ int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
411 wmb(); 411 wmb();
412 a->inited = true; 412 a->inited = true;
413 413
414#ifdef CONFIG_DEBUG_FS
414 nvgpu_init_alloc_debug(g, __a); 415 nvgpu_init_alloc_debug(g, __a);
416#endif
415 alloc_dbg(__a, "New allocator: type bitmap\n"); 417 alloc_dbg(__a, "New allocator: type bitmap\n");
416 alloc_dbg(__a, " base 0x%llx\n", a->base); 418 alloc_dbg(__a, " base 0x%llx\n", a->base);
417 alloc_dbg(__a, " bit_offs 0x%llx\n", a->bit_offs); 419 alloc_dbg(__a, " bit_offs 0x%llx\n", a->bit_offs);
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
index 34bc51df..0ef94c10 100644
--- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -251,7 +251,9 @@ static void nvgpu_buddy_allocator_destroy(struct nvgpu_allocator *__a)
251 251
252 alloc_lock(__a); 252 alloc_lock(__a);
253 253
254#ifdef CONFIG_DEBUG_FS
254 nvgpu_fini_alloc_debug(__a); 255 nvgpu_fini_alloc_debug(__a);
256#endif
255 257
256 /* 258 /*
257 * Free the fixed allocs first. 259 * Free the fixed allocs first.
@@ -1290,7 +1292,9 @@ int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
1290 wmb(); 1292 wmb();
1291 a->initialized = 1; 1293 a->initialized = 1;
1292 1294
1295#ifdef CONFIG_DEBUG_FS
1293 nvgpu_init_alloc_debug(g, __a); 1296 nvgpu_init_alloc_debug(g, __a);
1297#endif
1294 alloc_dbg(__a, "New allocator: type buddy\n"); 1298 alloc_dbg(__a, "New allocator: type buddy\n");
1295 alloc_dbg(__a, " base 0x%llx\n", a->base); 1299 alloc_dbg(__a, " base 0x%llx\n", a->base);
1296 alloc_dbg(__a, " size 0x%llx\n", a->length); 1300 alloc_dbg(__a, " size 0x%llx\n", a->length);
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
index 234ae4a3..944b4b0f 100644
--- a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
@@ -99,7 +99,9 @@ static void nvgpu_lockless_alloc_destroy(struct nvgpu_allocator *a)
99{ 99{
100 struct nvgpu_lockless_allocator *pa = a->priv; 100 struct nvgpu_lockless_allocator *pa = a->priv;
101 101
102#ifdef CONFIG_DEBUG_FS
102 nvgpu_fini_alloc_debug(a); 103 nvgpu_fini_alloc_debug(a);
104#endif
103 105
104 nvgpu_vfree(a->g, pa->next); 106 nvgpu_vfree(a->g, pa->next);
105 nvgpu_kfree(nvgpu_alloc_to_gpu(a), pa); 107 nvgpu_kfree(nvgpu_alloc_to_gpu(a), pa);
@@ -191,7 +193,9 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
191 wmb(); 193 wmb();
192 a->inited = true; 194 a->inited = true;
193 195
196#ifdef CONFIG_DEBUG_FS
194 nvgpu_init_alloc_debug(g, __a); 197 nvgpu_init_alloc_debug(g, __a);
198#endif
195 alloc_dbg(__a, "New allocator: type lockless\n"); 199 alloc_dbg(__a, "New allocator: type lockless\n");
196 alloc_dbg(__a, " base 0x%llx\n", a->base); 200 alloc_dbg(__a, " base 0x%llx\n", a->base);
197 alloc_dbg(__a, " nodes %d\n", a->nr_nodes); 201 alloc_dbg(__a, " nodes %d\n", a->nr_nodes);
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
index 211b353b..1646d2b1 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
@@ -20,11 +20,6 @@
20 20
21#include "gk20a/gk20a.h" 21#include "gk20a/gk20a.h"
22#include "gk20a/mm_gk20a.h" 22#include "gk20a/mm_gk20a.h"
23#ifdef CONFIG_DEBUG_FS
24#include "gk20a/platform_gk20a.h"
25#endif
26
27u32 nvgpu_alloc_tracing_on;
28 23
29u64 nvgpu_alloc_length(struct nvgpu_allocator *a) 24u64 nvgpu_alloc_length(struct nvgpu_allocator *a)
30{ 25{
@@ -151,68 +146,3 @@ int __nvgpu_alloc_common_init(struct nvgpu_allocator *a, struct gk20a *g,
151 146
152 return 0; 147 return 0;
153} 148}
154
155#ifdef CONFIG_DEBUG_FS
156void nvgpu_alloc_print_stats(struct nvgpu_allocator *__a,
157 struct seq_file *s, int lock)
158{
159 __a->ops->print_stats(__a, s, lock);
160}
161
162static int __alloc_show(struct seq_file *s, void *unused)
163{
164 struct nvgpu_allocator *a = s->private;
165
166 nvgpu_alloc_print_stats(a, s, 1);
167
168 return 0;
169}
170
171static int __alloc_open(struct inode *inode, struct file *file)
172{
173 return single_open(file, __alloc_show, inode->i_private);
174}
175
176static const struct file_operations __alloc_fops = {
177 .open = __alloc_open,
178 .read = seq_read,
179 .llseek = seq_lseek,
180 .release = single_release,
181};
182#endif
183
184void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
185{
186#ifdef CONFIG_DEBUG_FS
187 if (!g->debugfs_allocators)
188 return;
189
190 a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
191 g->debugfs_allocators,
192 a, &__alloc_fops);
193#endif
194}
195
196void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
197{
198#ifdef CONFIG_DEBUG_FS
199 if (!IS_ERR_OR_NULL(a->debugfs_entry))
200 debugfs_remove(a->debugfs_entry);
201#endif
202}
203
204#ifdef CONFIG_DEBUG_FS
205void nvgpu_alloc_debugfs_init(struct device *dev)
206{
207 struct gk20a_platform *platform = dev_get_drvdata(dev);
208 struct dentry *gpu_root = platform->debugfs;
209 struct gk20a *g = get_gk20a(dev);
210
211 g->debugfs_allocators = debugfs_create_dir("allocators", gpu_root);
212 if (IS_ERR_OR_NULL(g->debugfs_allocators))
213 return;
214
215 debugfs_create_u32("tracing", 0664, g->debugfs_allocators,
216 &nvgpu_alloc_tracing_on);
217}
218#endif
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
index 14b5da3c..3f4f3706 100644
--- a/drivers/gpu/nvgpu/common/mm/page_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -916,7 +916,9 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
916 if (err) 916 if (err)
917 goto fail; 917 goto fail;
918 918
919#ifdef CONFIG_DEBUG_FS
919 nvgpu_init_alloc_debug(g, __a); 920 nvgpu_init_alloc_debug(g, __a);
921#endif
920 palloc_dbg(a, "New allocator: type page\n"); 922 palloc_dbg(a, "New allocator: type page\n");
921 palloc_dbg(a, " base 0x%llx\n", a->base); 923 palloc_dbg(a, " base 0x%llx\n", a->base);
922 palloc_dbg(a, " size 0x%llx\n", a->length); 924 palloc_dbg(a, " size 0x%llx\n", a->length);
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index a0160274..084f1793 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -18,9 +18,6 @@
18 18
19#include <linux/dma-mapping.h> 19#include <linux/dma-mapping.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
21#ifdef CONFIG_DEBUG_FS
22#include <linux/debugfs.h>
23#endif
24#include <linux/dma-buf.h> 21#include <linux/dma-buf.h>
25 22
26#include <trace/events/gk20a.h> 23#include <trace/events/gk20a.h>
@@ -40,8 +37,6 @@
40#include "cde_gk20a.h" 37#include "cde_gk20a.h"
41#include "fence_gk20a.h" 38#include "fence_gk20a.h"
42#include "gr_gk20a.h" 39#include "gr_gk20a.h"
43#include "debug_gk20a.h"
44#include "platform_gk20a.h"
45 40
46#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h> 41#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
47#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> 42#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
@@ -1585,8 +1580,7 @@ int gk20a_prepare_compressible_read(
1585 if (IS_ERR(dmabuf)) 1580 if (IS_ERR(dmabuf))
1586 return -EINVAL; 1581 return -EINVAL;
1587 1582
1588 err = gk20a_dmabuf_get_state(dmabuf, dev_from_gk20a(g), 1583 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1589 offset, &state);
1590 if (err) { 1584 if (err) {
1591 dma_buf_put(dmabuf); 1585 dma_buf_put(dmabuf);
1592 return err; 1586 return err;
@@ -1650,7 +1644,7 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
1650 return -EINVAL; 1644 return -EINVAL;
1651 } 1645 }
1652 1646
1653 err = gk20a_dmabuf_get_state(dmabuf, dev_from_gk20a(g), offset, &state); 1647 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1654 if (err) { 1648 if (err) {
1655 nvgpu_err(g, "could not get state from dmabuf"); 1649 nvgpu_err(g, "could not get state from dmabuf");
1656 dma_buf_put(dmabuf); 1650 dma_buf_put(dmabuf);
@@ -1671,38 +1665,3 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
1671 dma_buf_put(dmabuf); 1665 dma_buf_put(dmabuf);
1672 return 0; 1666 return 0;
1673} 1667}
1674
1675#ifdef CONFIG_DEBUG_FS
1676static ssize_t gk20a_cde_reload_write(struct file *file,
1677 const char __user *userbuf, size_t count, loff_t *ppos)
1678{
1679 struct gk20a *g = file->private_data;
1680 gk20a_cde_reload(g);
1681 return count;
1682}
1683
1684static const struct file_operations gk20a_cde_reload_fops = {
1685 .open = simple_open,
1686 .write = gk20a_cde_reload_write,
1687};
1688
1689void gk20a_cde_debugfs_init(struct device *dev)
1690{
1691 struct gk20a_platform *platform = dev_get_drvdata(dev);
1692 struct gk20a *g = get_gk20a(dev);
1693
1694 if (!platform->has_cde)
1695 return;
1696
1697 debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
1698 platform->debugfs, &g->cde_app.shader_parameter);
1699 debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
1700 platform->debugfs, &g->cde_app.ctx_count);
1701 debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
1702 platform->debugfs, &g->cde_app.ctx_usecount);
1703 debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
1704 platform->debugfs, &g->cde_app.ctx_count_top);
1705 debugfs_create_file("reload_cde_firmware", S_IWUSR, platform->debugfs,
1706 g, &gk20a_cde_reload_fops);
1707}
1708#endif
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
index ffd55b4d..4f400bf3 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
@@ -295,7 +295,6 @@ int gk20a_cde_convert(struct gk20a *g,
295 struct nvgpu_fence *fence, 295 struct nvgpu_fence *fence,
296 u32 __flags, struct gk20a_cde_param *params, 296 u32 __flags, struct gk20a_cde_param *params,
297 int num_params, struct gk20a_fence **fence_out); 297 int num_params, struct gk20a_fence **fence_out);
298void gk20a_cde_debugfs_init(struct device *dev);
299 298
300int gk20a_prepare_compressible_read( 299int gk20a_prepare_compressible_read(
301 struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, 300 struct gk20a *g, u32 buffer_fd, u32 request, u64 offset,
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index 1ed90b14..c905bedb 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -13,15 +13,10 @@
13 * more details. 13 * more details.
14 */ 14 */
15 15
16#ifdef CONFIG_DEBUG_FS
17#include <linux/debugfs.h>
18#endif
19
20#include <nvgpu/kmem.h> 16#include <nvgpu/kmem.h>
21#include <nvgpu/dma.h> 17#include <nvgpu/dma.h>
22 18
23#include "gk20a.h" 19#include "gk20a.h"
24#include "debug_gk20a.h"
25 20
26#include <nvgpu/log.h> 21#include <nvgpu/log.h>
27 22
@@ -33,10 +28,6 @@
33#include <nvgpu/hw/gk20a/hw_mc_gk20a.h> 28#include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
34#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> 29#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
35 30
36#ifdef CONFIG_DEBUG_FS
37#include "platform_gk20a.h"
38#endif
39
40static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr) 31static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr)
41{ 32{
42 gk20a_dbg(gpu_dbg_intr, "ce2 non-blocking pipe interrupt\n"); 33 gk20a_dbg(gpu_dbg_intr, "ce2 non-blocking pipe interrupt\n");
@@ -728,18 +719,3 @@ void gk20a_ce_delete_context_priv(struct gk20a *g,
728 return; 719 return;
729} 720}
730EXPORT_SYMBOL(gk20a_ce_delete_context); 721EXPORT_SYMBOL(gk20a_ce_delete_context);
731
732#ifdef CONFIG_DEBUG_FS
733void gk20a_ce_debugfs_init(struct device *dev)
734{
735 struct gk20a_platform *platform = dev_get_drvdata(dev);
736 struct gk20a *g = get_gk20a(dev);
737
738 debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO,
739 platform->debugfs, &g->ce_app.ctx_count);
740 debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO,
741 platform->debugfs, &g->ce_app.app_state);
742 debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO,
743 platform->debugfs, &g->ce_app.next_ctx_id);
744}
745#endif
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
index dfd19019..f972e175 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
@@ -156,10 +156,4 @@ void gk20a_ce_delete_context_priv(struct gk20a *g,
156void gk20a_ce_delete_context(struct gk20a *g, 156void gk20a_ce_delete_context(struct gk20a *g,
157 u32 ce_ctx_id); 157 u32 ce_ctx_id);
158 158
159
160#ifdef CONFIG_DEBUG_FS
161/* CE app debugfs api */
162void gk20a_ce_debugfs_init(struct device *dev);
163#endif
164
165#endif /*__CE2_GK20A_H__*/ 159#endif /*__CE2_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 571570d8..13abed95 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -30,9 +30,9 @@
30#include <nvgpu/circ_buf.h> 30#include <nvgpu/circ_buf.h>
31#include <nvgpu/cond.h> 31#include <nvgpu/cond.h>
32#include <nvgpu/enabled.h> 32#include <nvgpu/enabled.h>
33#include <nvgpu/debug.h>
33 34
34#include "gk20a.h" 35#include "gk20a.h"
35#include "debug_gk20a.h"
36#include "ctxsw_trace_gk20a.h" 36#include "ctxsw_trace_gk20a.h"
37#include "dbg_gpu_gk20a.h" 37#include "dbg_gpu_gk20a.h"
38#include "fence_gk20a.h" 38#include "fence_gk20a.h"
@@ -1403,6 +1403,7 @@ static u32 get_gp_free_count(struct channel_gk20a *c)
1403 return gp_free_count(c); 1403 return gp_free_count(c);
1404} 1404}
1405 1405
1406#ifdef CONFIG_DEBUG_FS
1406static void trace_write_pushbuffer(struct channel_gk20a *c, 1407static void trace_write_pushbuffer(struct channel_gk20a *c,
1407 struct nvgpu_gpfifo *g) 1408 struct nvgpu_gpfifo *g)
1408{ 1409{
@@ -1439,6 +1440,7 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
1439 dma_buf_vunmap(dmabuf, mem); 1440 dma_buf_vunmap(dmabuf, mem);
1440 } 1441 }
1441} 1442}
1443#endif
1442 1444
1443static void trace_write_pushbuffer_range(struct channel_gk20a *c, 1445static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1444 struct nvgpu_gpfifo *g, 1446 struct nvgpu_gpfifo *g,
@@ -1446,6 +1448,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1446 int offset, 1448 int offset,
1447 int count) 1449 int count)
1448{ 1450{
1451#ifdef CONFIG_DEBUG_FS
1449 u32 size; 1452 u32 size;
1450 int i; 1453 int i;
1451 struct nvgpu_gpfifo *gp; 1454 struct nvgpu_gpfifo *gp;
@@ -1478,6 +1481,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1478 1481
1479 if (gpfifo_allocated) 1482 if (gpfifo_allocated)
1480 nvgpu_big_free(c->g, g); 1483 nvgpu_big_free(c->g, g);
1484#endif
1481} 1485}
1482 1486
1483static void __gk20a_channel_timeout_start(struct channel_gk20a *ch) 1487static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
@@ -1629,8 +1633,8 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
1629 nvgpu_err(g, "Job on channel %d timed out", 1633 nvgpu_err(g, "Job on channel %d timed out",
1630 ch->hw_chid); 1634 ch->hw_chid);
1631 1635
1632 gk20a_debug_dump(g->dev); 1636 gk20a_debug_dump(g);
1633 gk20a_gr_debug_dump(g->dev); 1637 gk20a_gr_debug_dump(g);
1634 1638
1635 g->ops.fifo.force_reset_ch(ch, 1639 g->ops.fifo.force_reset_ch(ch,
1636 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true); 1640 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index ac3a3d57..46560a56 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -29,12 +29,11 @@
29#include <nvgpu/log.h> 29#include <nvgpu/log.h>
30#include <nvgpu/soc.h> 30#include <nvgpu/soc.h>
31#include <nvgpu/atomic.h> 31#include <nvgpu/atomic.h>
32#include <nvgpu/sort.h>
33#include <nvgpu/bug.h> 32#include <nvgpu/bug.h>
34#include <nvgpu/log2.h> 33#include <nvgpu/log2.h>
34#include <nvgpu/debug.h>
35 35
36#include "gk20a.h" 36#include "gk20a.h"
37#include "debug_gk20a.h"
38#include "ctxsw_trace_gk20a.h" 37#include "ctxsw_trace_gk20a.h"
39#include "mm_gk20a.h" 38#include "mm_gk20a.h"
40 39
@@ -46,10 +45,6 @@
46#include <nvgpu/hw/gk20a/hw_mc_gk20a.h> 45#include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
47#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> 46#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
48 47
49#ifdef CONFIG_DEBUG_FS
50#include "platform_gk20a.h"
51#endif
52
53#define FECS_METHOD_WFI_RESTORE 0x80000 48#define FECS_METHOD_WFI_RESTORE 0x80000
54 49
55static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, 50static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
@@ -57,10 +52,6 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
57 bool wait_for_finish); 52 bool wait_for_finish);
58static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg); 53static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg);
59 54
60#ifdef CONFIG_DEBUG_FS
61static void __gk20a_fifo_profile_free(struct kref *ref);
62#endif
63
64u32 gk20a_fifo_get_engine_ids(struct gk20a *g, 55u32 gk20a_fifo_get_engine_ids(struct gk20a *g,
65 u32 engine_id[], u32 engine_id_sz, 56 u32 engine_id[], u32 engine_id_sz,
66 u32 engine_enum) 57 u32 engine_enum)
@@ -562,14 +553,6 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
562 f->engine_info = NULL; 553 f->engine_info = NULL;
563 nvgpu_kfree(g, f->active_engines_list); 554 nvgpu_kfree(g, f->active_engines_list);
564 f->active_engines_list = NULL; 555 f->active_engines_list = NULL;
565#ifdef CONFIG_DEBUG_FS
566 nvgpu_mutex_acquire(&f->profile.lock);
567 if (f->profile.enabled) {
568 f->profile.enabled = false;
569 kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
570 }
571 nvgpu_mutex_release(&f->profile.lock);
572#endif
573} 556}
574 557
575/* reads info from hardware and fills in pbmda exception info record */ 558/* reads info from hardware and fills in pbmda exception info record */
@@ -1543,7 +1526,7 @@ static bool gk20a_fifo_handle_mmu_fault(
1543 } else { 1526 } else {
1544 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r()); 1527 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
1545 fake_fault = false; 1528 fake_fault = false;
1546 gk20a_debug_dump(g->dev); 1529 gk20a_debug_dump(g);
1547 } 1530 }
1548 1531
1549 1532
@@ -1833,7 +1816,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
1833 gk20a_channel_abort(ch, false); 1816 gk20a_channel_abort(ch, false);
1834 1817
1835 if (gk20a_fifo_error_ch(g, ch)) 1818 if (gk20a_fifo_error_ch(g, ch))
1836 gk20a_debug_dump(g->dev); 1819 gk20a_debug_dump(g);
1837 1820
1838 gk20a_channel_put(ch); 1821 gk20a_channel_put(ch);
1839 } 1822 }
@@ -1860,7 +1843,7 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
1860 struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; 1843 struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid];
1861 1844
1862 if (gk20a_fifo_error_tsg(g, tsg)) 1845 if (gk20a_fifo_error_tsg(g, tsg))
1863 gk20a_debug_dump(g->dev); 1846 gk20a_debug_dump(g);
1864 1847
1865 gk20a_fifo_abort_tsg(g, tsgid, false); 1848 gk20a_fifo_abort_tsg(g, tsgid, false);
1866 } 1849 }
@@ -1957,7 +1940,7 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1957 unsigned int id_type; 1940 unsigned int id_type;
1958 1941
1959 if (verbose) 1942 if (verbose)
1960 gk20a_debug_dump(g->dev); 1943 gk20a_debug_dump(g);
1961 1944
1962 if (g->ops.ltc.flush) 1945 if (g->ops.ltc.flush)
1963 g->ops.ltc.flush(g); 1946 g->ops.ltc.flush(g);
@@ -3441,345 +3424,6 @@ struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
3441 return NULL; 3424 return NULL;
3442} 3425}
3443 3426
3444#ifdef CONFIG_DEBUG_FS
3445
3446/* Get the next element in the ring buffer of profile entries
3447 * and grab a reference to the structure
3448 */
3449struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
3450{
3451 struct fifo_gk20a *f = &g->fifo;
3452 struct fifo_profile_gk20a *profile;
3453 unsigned int index;
3454
3455 /* If kref is zero, profiling is not enabled */
3456 if (!kref_get_unless_zero(&f->profile.ref))
3457 return NULL;
3458 index = atomic_inc_return(&f->profile.get);
3459 profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
3460
3461 return profile;
3462}
3463
3464/* Free the reference to the structure. This allows deferred cleanups */
3465void gk20a_fifo_profile_release(struct gk20a *g,
3466 struct fifo_profile_gk20a *profile)
3467{
3468 kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
3469}
3470
3471static void *gk20a_fifo_sched_debugfs_seq_start(
3472 struct seq_file *s, loff_t *pos)
3473{
3474 struct gk20a *g = s->private;
3475 struct fifo_gk20a *f = &g->fifo;
3476
3477 if (*pos >= f->num_channels)
3478 return NULL;
3479
3480 return &f->channel[*pos];
3481}
3482
3483static void *gk20a_fifo_sched_debugfs_seq_next(
3484 struct seq_file *s, void *v, loff_t *pos)
3485{
3486 struct gk20a *g = s->private;
3487 struct fifo_gk20a *f = &g->fifo;
3488
3489 ++(*pos);
3490 if (*pos >= f->num_channels)
3491 return NULL;
3492
3493 return &f->channel[*pos];
3494}
3495
3496static void gk20a_fifo_sched_debugfs_seq_stop(
3497 struct seq_file *s, void *v)
3498{
3499}
3500
3501static int gk20a_fifo_sched_debugfs_seq_show(
3502 struct seq_file *s, void *v)
3503{
3504 struct gk20a *g = s->private;
3505 struct fifo_gk20a *f = &g->fifo;
3506 struct channel_gk20a *ch = v;
3507 struct tsg_gk20a *tsg = NULL;
3508
3509 struct fifo_engine_info_gk20a *engine_info;
3510 struct fifo_runlist_info_gk20a *runlist;
3511 u32 runlist_id;
3512 int ret = SEQ_SKIP;
3513 u32 engine_id;
3514
3515 engine_id = gk20a_fifo_get_gr_engine_id(g);
3516 engine_info = (f->engine_info + engine_id);
3517 runlist_id = engine_info->runlist_id;
3518 runlist = &f->runlist_info[runlist_id];
3519
3520 if (ch == f->channel) {
3521 seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n");
3522 seq_puts(s, " (usecs) (msecs)\n");
3523 ret = 0;
3524 }
3525
3526 if (!test_bit(ch->hw_chid, runlist->active_channels))
3527 return ret;
3528
3529 if (gk20a_channel_get(ch)) {
3530 if (gk20a_is_channel_marked_as_tsg(ch))
3531 tsg = &f->tsg[ch->tsgid];
3532
3533 seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
3534 ch->hw_chid,
3535 ch->tsgid,
3536 ch->tgid,
3537 tsg ? tsg->timeslice_us : ch->timeslice_us,
3538 ch->timeout_ms_max,
3539 tsg ? tsg->interleave_level : ch->interleave_level,
3540 ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX,
3541 ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX);
3542 gk20a_channel_put(ch);
3543 }
3544 return 0;
3545}
3546
3547static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
3548 .start = gk20a_fifo_sched_debugfs_seq_start,
3549 .next = gk20a_fifo_sched_debugfs_seq_next,
3550 .stop = gk20a_fifo_sched_debugfs_seq_stop,
3551 .show = gk20a_fifo_sched_debugfs_seq_show
3552};
3553
3554static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
3555 struct file *file)
3556{
3557 int err;
3558
3559 if (!capable(CAP_SYS_ADMIN))
3560 return -EPERM;
3561
3562 err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
3563 if (err)
3564 return err;
3565
3566 gk20a_dbg(gpu_dbg_info, "i_private=%p", inode->i_private);
3567
3568 ((struct seq_file *)file->private_data)->private = inode->i_private;
3569 return 0;
3570};
3571
3572/*
3573 * The file operations structure contains our open function along with
3574 * set of the canned seq_ ops.
3575 */
3576static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
3577 .owner = THIS_MODULE,
3578 .open = gk20a_fifo_sched_debugfs_open,
3579 .read = seq_read,
3580 .llseek = seq_lseek,
3581 .release = seq_release
3582};
3583
3584static void __gk20a_fifo_profile_free(struct kref *ref)
3585{
3586 struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
3587 profile.ref);
3588 nvgpu_vfree(f->g, f->profile.data);
3589 nvgpu_vfree(f->g, f->profile.sorted);
3590}
3591
3592static int gk20a_fifo_profile_enable(void *data, u64 val)
3593{
3594 struct gk20a *g = (struct gk20a *) data;
3595 struct fifo_gk20a *f = &g->fifo;
3596
3597
3598 nvgpu_mutex_acquire(&f->profile.lock);
3599 if (val == 0) {
3600 if (f->profile.enabled) {
3601 f->profile.enabled = false;
3602 kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
3603 }
3604 } else {
3605 if (!f->profile.enabled) {
3606 /* not kref init as it can have a running condition if
3607 * we enable/disable/enable while kickoff is happening
3608 */
3609 if (!kref_get_unless_zero(&f->profile.ref)) {
3610 f->profile.data = vzalloc(
3611 FIFO_PROFILING_ENTRIES *
3612 sizeof(struct fifo_profile_gk20a));
3613 f->profile.sorted = vzalloc(
3614 FIFO_PROFILING_ENTRIES *
3615 sizeof(u64));
3616 if (!(f->profile.data && f->profile.sorted)) {
3617 nvgpu_vfree(g, f->profile.data);
3618 nvgpu_vfree(g, f->profile.sorted);
3619 nvgpu_mutex_release(&f->profile.lock);
3620 return -ENOMEM;
3621 }
3622 kref_init(&f->profile.ref);
3623 }
3624 atomic_set(&f->profile.get, 0);
3625 f->profile.enabled = true;
3626 }
3627 }
3628 nvgpu_mutex_release(&f->profile.lock);
3629
3630 return 0;
3631}
3632
3633DEFINE_SIMPLE_ATTRIBUTE(
3634 gk20a_fifo_profile_enable_debugfs_fops,
3635 NULL,
3636 gk20a_fifo_profile_enable,
3637 "%llu\n"
3638);
3639
3640static int __profile_cmp(const void *a, const void *b)
3641{
3642 return *((unsigned long long *) a) - *((unsigned long long *) b);
3643}
3644
3645/*
3646 * This uses about 800b in the stack, but the function using it is not part
3647 * of a callstack where much memory is being used, so it is fine
3648 */
3649#define PERCENTILE_WIDTH 5
3650#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH)
3651
3652static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
3653 u64 *percentiles, u32 index_end, u32 index_start)
3654{
3655 unsigned int nelem = 0;
3656 unsigned int index;
3657 struct fifo_profile_gk20a *profile;
3658
3659 for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
3660 profile = &g->fifo.profile.data[index];
3661
3662 if (profile->timestamp[index_end] >
3663 profile->timestamp[index_start]) {
3664 /* This is a valid element */
3665 g->fifo.profile.sorted[nelem] =
3666 profile->timestamp[index_end] -
3667 profile->timestamp[index_start];
3668 nelem++;
3669 }
3670 }
3671
3672 /* sort it */
3673 sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
3674 __profile_cmp, NULL);
3675
3676 /* build ranges */
3677 for (index = 0; index < PERCENTILE_RANGES; index++)
3678 percentiles[index] =
3679 g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
3680 nelem)/100 - 1];
3681 return nelem;
3682}
3683
3684static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
3685{
3686 struct gk20a *g = s->private;
3687 unsigned int get, nelem, index;
3688 /*
3689 * 800B in the stack, but function is declared statically and only
3690 * called from debugfs handler
3691 */
3692 u64 percentiles_ioctl[PERCENTILE_RANGES];
3693 u64 percentiles_kickoff[PERCENTILE_RANGES];
3694 u64 percentiles_jobtracking[PERCENTILE_RANGES];
3695 u64 percentiles_append[PERCENTILE_RANGES];
3696 u64 percentiles_userd[PERCENTILE_RANGES];
3697
3698 if (!kref_get_unless_zero(&g->fifo.profile.ref)) {
3699 seq_printf(s, "Profiling disabled\n");
3700 return 0;
3701 }
3702
3703 get = atomic_read(&g->fifo.profile.get);
3704
3705 __gk20a_fifo_create_stats(g, percentiles_ioctl,
3706 PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
3707 __gk20a_fifo_create_stats(g, percentiles_kickoff,
3708 PROFILE_END, PROFILE_ENTRY);
3709 __gk20a_fifo_create_stats(g, percentiles_jobtracking,
3710 PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
3711 __gk20a_fifo_create_stats(g, percentiles_append,
3712 PROFILE_APPEND, PROFILE_JOB_TRACKING);
3713 nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
3714 PROFILE_END, PROFILE_APPEND);
3715
3716 seq_printf(s, "Number of kickoffs: %d\n", nelem);
3717 seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
3718
3719 for (index = 0; index < PERCENTILE_RANGES; index++)
3720 seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
3721 PERCENTILE_WIDTH * (index+1),
3722 percentiles_ioctl[index],
3723 percentiles_kickoff[index],
3724 percentiles_append[index],
3725 percentiles_jobtracking[index],
3726 percentiles_userd[index]);
3727
3728 kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
3729
3730 return 0;
3731}
3732
3733static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
3734{
3735 return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
3736}
3737
3738static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
3739 .open = gk20a_fifo_profile_stats_open,
3740 .read = seq_read,
3741 .llseek = seq_lseek,
3742 .release = single_release,
3743};
3744
3745
3746void gk20a_fifo_debugfs_init(struct device *dev)
3747{
3748 struct gk20a_platform *platform = dev_get_drvdata(dev);
3749 struct gk20a *g = get_gk20a(dev);
3750
3751 struct dentry *gpu_root = platform->debugfs;
3752 struct dentry *fifo_root;
3753 struct dentry *profile_root;
3754
3755
3756 fifo_root = debugfs_create_dir("fifo", gpu_root);
3757 if (IS_ERR_OR_NULL(fifo_root))
3758 return;
3759
3760 gk20a_dbg(gpu_dbg_info, "g=%p", g);
3761
3762 debugfs_create_file("sched", 0600, fifo_root, g,
3763 &gk20a_fifo_sched_debugfs_fops);
3764
3765 profile_root = debugfs_create_dir("profile", fifo_root);
3766 if (IS_ERR_OR_NULL(profile_root))
3767 return;
3768
3769 nvgpu_mutex_init(&g->fifo.profile.lock);
3770 g->fifo.profile.enabled = false;
3771 atomic_set(&g->fifo.profile.get, 0);
3772 atomic_set(&g->fifo.profile.ref.refcount, 0);
3773
3774 debugfs_create_file("enable", 0600, profile_root, g,
3775 &gk20a_fifo_profile_enable_debugfs_fops);
3776
3777 debugfs_create_file("stats", 0600, profile_root, g,
3778 &gk20a_fifo_profile_stats_debugfs_fops);
3779
3780}
3781#endif /* CONFIG_DEBUG_FS */
3782
3783static const char * const ccsr_chan_status_str[] = { 3427static const char * const ccsr_chan_status_str[] = {
3784 "idle", 3428 "idle",
3785 "pending", 3429 "pending",
@@ -3901,6 +3545,54 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
3901 gk20a_debug_output(o, "\n"); 3545 gk20a_debug_output(o, "\n");
3902} 3546}
3903 3547
3548void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
3549 struct gk20a_debug_output *o)
3550{
3551 struct fifo_gk20a *f = &g->fifo;
3552 u32 chid;
3553 struct ch_state **ch_state;
3554
3555 ch_state = nvgpu_kzalloc(g, sizeof(*ch_state) * f->num_channels);
3556 if (!ch_state) {
3557 gk20a_debug_output(o, "cannot alloc memory for channels\n");
3558 return;
3559 }
3560
3561 for (chid = 0; chid < f->num_channels; chid++) {
3562 struct channel_gk20a *ch = &f->channel[chid];
3563 if (gk20a_channel_get(ch)) {
3564 ch_state[chid] =
3565 nvgpu_kmalloc(g, sizeof(struct ch_state) +
3566 ram_in_alloc_size_v());
3567 /* ref taken stays to below loop with
3568 * successful allocs */
3569 if (!ch_state[chid])
3570 gk20a_channel_put(ch);
3571 }
3572 }
3573
3574 for (chid = 0; chid < f->num_channels; chid++) {
3575 struct channel_gk20a *ch = &f->channel[chid];
3576 if (!ch_state[chid])
3577 continue;
3578
3579 ch_state[chid]->pid = ch->pid;
3580 ch_state[chid]->refs = atomic_read(&ch->ref_count);
3581 nvgpu_mem_rd_n(g, &ch->inst_block, 0,
3582 &ch_state[chid]->inst_block[0],
3583 ram_in_alloc_size_v());
3584 gk20a_channel_put(ch);
3585 }
3586 for (chid = 0; chid < f->num_channels; chid++) {
3587 if (ch_state[chid]) {
3588 g->ops.fifo.dump_channel_status_ramfc(g, o, chid,
3589 ch_state[chid]);
3590 nvgpu_kfree(g, ch_state[chid]);
3591 }
3592 }
3593 nvgpu_kfree(g, ch_state);
3594}
3595
3904void gk20a_dump_pbdma_status(struct gk20a *g, 3596void gk20a_dump_pbdma_status(struct gk20a *g,
3905 struct gk20a_debug_output *o) 3597 struct gk20a_debug_output *o)
3906{ 3598{
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 6c8868a2..228e5130 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -23,10 +23,11 @@
23 23
24#include "channel_gk20a.h" 24#include "channel_gk20a.h"
25#include "tsg_gk20a.h" 25#include "tsg_gk20a.h"
26#include "debug_gk20a.h"
27 26
28#include <nvgpu/kref.h> 27#include <nvgpu/kref.h>
29 28
29struct gk20a_debug_output;
30
30#define MAX_RUNLIST_BUFFERS 2 31#define MAX_RUNLIST_BUFFERS 2
31 32
32#define FIFO_INVAL_ENGINE_ID ((u32)~0) 33#define FIFO_INVAL_ENGINE_ID ((u32)~0)
@@ -287,8 +288,6 @@ int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
287int gk20a_fifo_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice); 288int gk20a_fifo_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice);
288 289
289 290
290void gk20a_fifo_debugfs_init(struct device *dev);
291
292const char *gk20a_fifo_interleave_level_name(u32 interleave_level); 291const char *gk20a_fifo_interleave_level_name(u32 interleave_level);
293 292
294int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type, 293int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type,
@@ -341,6 +340,8 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
341 struct gk20a_debug_output *o, 340 struct gk20a_debug_output *o,
342 u32 hw_chid, 341 u32 hw_chid,
343 struct ch_state *ch_state); 342 struct ch_state *ch_state);
343void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
344 struct gk20a_debug_output *o);
344void gk20a_dump_pbdma_status(struct gk20a *g, 345void gk20a_dump_pbdma_status(struct gk20a *g,
345 struct gk20a_debug_output *o); 346 struct gk20a_debug_output *o);
346void gk20a_dump_eng_status(struct gk20a *g, 347void gk20a_dump_eng_status(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 689fafb1..899c1d6a 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -30,6 +30,7 @@ struct acr_desc;
30struct nvgpu_mem_alloc_tracker; 30struct nvgpu_mem_alloc_tracker;
31struct dbg_profiler_object_data; 31struct dbg_profiler_object_data;
32struct ecc_gk20a; 32struct ecc_gk20a;
33struct gk20a_debug_output;
33 34
34#include <linux/sched.h> 35#include <linux/sched.h>
35#include <nvgpu/lock.h> 36#include <nvgpu/lock.h>
@@ -61,7 +62,6 @@ struct ecc_gk20a;
61#include "therm_gk20a.h" 62#include "therm_gk20a.h"
62#include "gm20b/acr_gm20b.h" 63#include "gm20b/acr_gm20b.h"
63#include "cde_gk20a.h" 64#include "cde_gk20a.h"
64#include "debug_gk20a.h"
65#include "sched_gk20a.h" 65#include "sched_gk20a.h"
66#ifdef CONFIG_ARCH_TEGRA_18x_SOC 66#ifdef CONFIG_ARCH_TEGRA_18x_SOC
67#include "clk/clk.h" 67#include "clk/clk.h"
@@ -1544,10 +1544,6 @@ void nvgpu_wait_for_deferred_interrupts(struct gk20a *g);
1544struct gk20a * __must_check gk20a_get(struct gk20a *g); 1544struct gk20a * __must_check gk20a_get(struct gk20a *g);
1545void gk20a_put(struct gk20a *g); 1545void gk20a_put(struct gk20a *g);
1546 1546
1547#ifdef CONFIG_DEBUG_FS
1548int gk20a_railgating_debugfs_init(struct device *dev);
1549#endif
1550
1551static inline bool gk20a_platform_has_syncpoints(struct gk20a *g) 1547static inline bool gk20a_platform_has_syncpoints(struct gk20a *g)
1552{ 1548{
1553#ifdef CONFIG_TEGRA_GK20A_NVHOST 1549#ifdef CONFIG_TEGRA_GK20A_NVHOST
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 2188618c..982cfac8 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -30,6 +30,7 @@
30#include <nvgpu/bug.h> 30#include <nvgpu/bug.h>
31#include <nvgpu/firmware.h> 31#include <nvgpu/firmware.h>
32#include <nvgpu/enabled.h> 32#include <nvgpu/enabled.h>
33#include <nvgpu/debug.h>
33 34
34#include "gk20a.h" 35#include "gk20a.h"
35#include "kind_gk20a.h" 36#include "kind_gk20a.h"
@@ -37,13 +38,8 @@
37#include "gr_pri_gk20a.h" 38#include "gr_pri_gk20a.h"
38#include "regops_gk20a.h" 39#include "regops_gk20a.h"
39#include "dbg_gpu_gk20a.h" 40#include "dbg_gpu_gk20a.h"
40#include "debug_gk20a.h"
41#include "ctxsw_trace_gk20a.h" 41#include "ctxsw_trace_gk20a.h"
42 42
43#ifdef CONFIG_DEBUG_FS
44#include "platform_gk20a.h"
45#endif
46
47#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h> 43#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
48#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> 44#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
49#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h> 45#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
@@ -514,7 +510,7 @@ int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
514 nvgpu_err(g, 510 nvgpu_err(g,
515 "timeout waiting on ucode response"); 511 "timeout waiting on ucode response");
516 gk20a_fecs_dump_falcon_stats(g); 512 gk20a_fecs_dump_falcon_stats(g);
517 gk20a_gr_debug_dump(g->dev); 513 gk20a_gr_debug_dump(g);
518 return -1; 514 return -1;
519 } else if (check == WAIT_UCODE_ERROR) { 515 } else if (check == WAIT_UCODE_ERROR) {
520 nvgpu_err(g, 516 nvgpu_err(g,
@@ -9032,20 +9028,6 @@ static int gr_gk20a_dump_gr_status_regs(struct gk20a *g,
9032 return 0; 9028 return 0;
9033} 9029}
9034 9030
9035#ifdef CONFIG_DEBUG_FS
9036int gr_gk20a_debugfs_init(struct gk20a *g)
9037{
9038 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
9039
9040 g->debugfs_gr_default_attrib_cb_size =
9041 debugfs_create_u32("gr_default_attrib_cb_size",
9042 S_IRUGO|S_IWUSR, platform->debugfs,
9043 &g->gr.attrib_cb_default_size);
9044
9045 return 0;
9046}
9047#endif
9048
9049static void gr_gk20a_init_cyclestats(struct gk20a *g) 9031static void gr_gk20a_init_cyclestats(struct gk20a *g)
9050{ 9032{
9051#if defined(CONFIG_GK20A_CYCLE_STATS) 9033#if defined(CONFIG_GK20A_CYCLE_STATS)
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 79aeb42f..deb8ea9c 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -653,7 +653,6 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
653void gr_gk20a_free_gr_ctx(struct gk20a *g, 653void gr_gk20a_free_gr_ctx(struct gk20a *g,
654 struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); 654 struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx);
655int gr_gk20a_halt_pipe(struct gk20a *g); 655int gr_gk20a_halt_pipe(struct gk20a *g);
656int gr_gk20a_debugfs_init(struct gk20a *g);
657 656
658#if defined(CONFIG_GK20A_CYCLE_STATS) 657#if defined(CONFIG_GK20A_CYCLE_STATS)
659int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */ 658int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
index 8a3beb39..b19398a6 100644
--- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -37,6 +37,7 @@
37#include "pramin_gk20a.h" 37#include "pramin_gk20a.h"
38#include "priv_ring_gk20a.h" 38#include "priv_ring_gk20a.h"
39 39
40#include <nvgpu/debug.h>
40#include <nvgpu/log.h> 41#include <nvgpu/log.h>
41#include <nvgpu/bug.h> 42#include <nvgpu/bug.h>
42 43
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 53d22a7d..08e2e9cc 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -2563,13 +2563,13 @@ priv_exist_or_err:
2563 return 0; 2563 return 0;
2564} 2564}
2565 2565
2566int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct device *dev, 2566int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
2567 u64 offset, struct gk20a_buffer_state **state) 2567 u64 offset, struct gk20a_buffer_state **state)
2568{ 2568{
2569 int err = 0; 2569 int err = 0;
2570 struct gk20a_dmabuf_priv *priv; 2570 struct gk20a_dmabuf_priv *priv;
2571 struct gk20a_buffer_state *s; 2571 struct gk20a_buffer_state *s;
2572 struct gk20a *g = get_gk20a(dev); 2572 struct device *dev = g->dev;
2573 2573
2574 if (WARN_ON(offset >= (u64)dmabuf->size)) 2574 if (WARN_ON(offset >= (u64)dmabuf->size))
2575 return -EINVAL; 2575 return -EINVAL;
@@ -3123,18 +3123,6 @@ static bool gk20a_mm_is_bar1_supported(struct gk20a *g)
3123 return true; 3123 return true;
3124} 3124}
3125 3125
3126#ifdef CONFIG_DEBUG_FS
3127void gk20a_mm_debugfs_init(struct device *dev)
3128{
3129 struct gk20a_platform *platform = dev_get_drvdata(dev);
3130 struct dentry *gpu_root = platform->debugfs;
3131 struct gk20a *g = gk20a_get_platform(dev)->g;
3132
3133 debugfs_create_bool("force_pramin", 0664, gpu_root,
3134 &g->mm.force_pramin);
3135}
3136#endif
3137
3138void gk20a_init_mm(struct gpu_ops *gops) 3126void gk20a_init_mm(struct gpu_ops *gops)
3139{ 3127{
3140 gops->mm.gmmu_map = gk20a_locked_gmmu_map; 3128 gops->mm.gmmu_map = gk20a_locked_gmmu_map;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 79b55371..5d90cbf6 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -146,7 +146,6 @@ struct channel_gk20a;
146int gk20a_init_mm_support(struct gk20a *g); 146int gk20a_init_mm_support(struct gk20a *g);
147int gk20a_init_mm_setup_sw(struct gk20a *g); 147int gk20a_init_mm_setup_sw(struct gk20a *g);
148int gk20a_init_mm_setup_hw(struct gk20a *g); 148int gk20a_init_mm_setup_hw(struct gk20a *g);
149void gk20a_mm_debugfs_init(struct device *dev);
150void gk20a_init_mm_ce_context(struct gk20a *g); 149void gk20a_init_mm_ce_context(struct gk20a *g);
151 150
152int gk20a_mm_fb_flush(struct gk20a *g); 151int gk20a_mm_fb_flush(struct gk20a *g);
@@ -437,7 +436,7 @@ dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr);
437 436
438int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); 437int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
439 438
440int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct device *dev, 439int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
441 u64 offset, struct gk20a_buffer_state **state); 440 u64 offset, struct gk20a_buffer_state **state);
442 441
443int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry); 442int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry);
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index a9e03943..552d5d73 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -35,12 +35,6 @@
35#include "nvgpu_gpuid_t19x.h" 35#include "nvgpu_gpuid_t19x.h"
36#endif 36#endif
37 37
38#ifdef CONFIG_DEBUG_FS
39#include <linux/debugfs.h>
40#include <linux/uaccess.h>
41#include "platform_gk20a.h"
42#endif
43
44#define GK20A_PMU_UCODE_IMAGE "gpmu_ucode.bin" 38#define GK20A_PMU_UCODE_IMAGE "gpmu_ucode.bin"
45 39
46#define PMU_MEM_SCRUBBING_TIMEOUT_MAX 1000 40#define PMU_MEM_SCRUBBING_TIMEOUT_MAX 1000
@@ -49,7 +43,7 @@
49#define gk20a_dbg_pmu(fmt, arg...) \ 43#define gk20a_dbg_pmu(fmt, arg...) \
50 gk20a_dbg(gpu_dbg_pmu, fmt, ##arg) 44 gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
51 45
52static int gk20a_pmu_get_pg_stats(struct gk20a *g, 46int gk20a_pmu_get_pg_stats(struct gk20a *g,
53 u32 pg_engine_id, 47 u32 pg_engine_id,
54 struct pmu_pg_stats_data *pg_stat_data); 48 struct pmu_pg_stats_data *pg_stat_data);
55static void ap_callback_init_and_enable_ctrl( 49static void ap_callback_init_and_enable_ctrl(
@@ -281,7 +275,7 @@ static void set_pmu_cmdline_args_falctracesize_v1(
281 pmu->args_v1.falc_trace_size = size; 275 pmu->args_v1.falc_trace_size = size;
282} 276}
283 277
284static bool find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos) 278bool nvgpu_find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos)
285{ 279{
286 u32 i = 0, j = strlen(strings); 280 u32 i = 0, j = strlen(strings);
287 for (; i < j; i++) { 281 for (; i < j; i++) {
@@ -326,7 +320,7 @@ static void printtrace(struct pmu_gk20a *pmu)
326 count = scnprintf(buf, 0x40, "Index %x: ", trace1[(i / 4)]); 320 count = scnprintf(buf, 0x40, "Index %x: ", trace1[(i / 4)]);
327 l = 0; 321 l = 0;
328 m = 0; 322 m = 0;
329 while (find_hex_in_string((trace+i+20+m), g, &k)) { 323 while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) {
330 if (k >= 40) 324 if (k >= 40)
331 break; 325 break;
332 strncpy(part_str, (trace+i+20+m), k); 326 strncpy(part_str, (trace+i+20+m), k);
@@ -4141,7 +4135,7 @@ void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
4141 nvgpu_err(g, "ZBC save timeout"); 4135 nvgpu_err(g, "ZBC save timeout");
4142} 4136}
4143 4137
4144static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu) 4138int nvgpu_pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
4145{ 4139{
4146 struct gk20a *g = gk20a_from_pmu(pmu); 4140 struct gk20a *g = gk20a_from_pmu(pmu);
4147 struct pmu_v *pv = &g->ops.pmu_ver; 4141 struct pmu_v *pv = &g->ops.pmu_ver;
@@ -4185,7 +4179,7 @@ static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
4185 return 0; 4179 return 0;
4186} 4180}
4187 4181
4188static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu) 4182int nvgpu_pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu)
4189{ 4183{
4190 struct gk20a *g = gk20a_from_pmu(pmu); 4184 struct gk20a *g = gk20a_from_pmu(pmu);
4191 struct pmu_cmd cmd; 4185 struct pmu_cmd cmd;
@@ -4231,7 +4225,7 @@ static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu,
4231 4225
4232 /* restart sampling */ 4226 /* restart sampling */
4233 if (pmu->perfmon_sampling_enabled) 4227 if (pmu->perfmon_sampling_enabled)
4234 return pmu_perfmon_start_sampling(pmu); 4228 return nvgpu_pmu_perfmon_start_sampling(pmu);
4235 return 0; 4229 return 0;
4236} 4230}
4237 4231
@@ -5173,9 +5167,9 @@ int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable)
5173 gk20a_dbg_fn(""); 5167 gk20a_dbg_fn("");
5174 5168
5175 if (enable) 5169 if (enable)
5176 err = pmu_perfmon_start_sampling(pmu); 5170 err = nvgpu_pmu_perfmon_start_sampling(pmu);
5177 else 5171 else
5178 err = pmu_perfmon_stop_sampling(pmu); 5172 err = nvgpu_pmu_perfmon_stop_sampling(pmu);
5179 5173
5180 return err; 5174 return err;
5181} 5175}
@@ -5293,7 +5287,7 @@ void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id,
5293 pg_stat_data->avg_exit_latency_us = stats.pg_avg_exit_time_us; 5287 pg_stat_data->avg_exit_latency_us = stats.pg_avg_exit_time_us;
5294} 5288}
5295 5289
5296static int gk20a_pmu_get_pg_stats(struct gk20a *g, 5290int gk20a_pmu_get_pg_stats(struct gk20a *g,
5297 u32 pg_engine_id, 5291 u32 pg_engine_id,
5298 struct pmu_pg_stats_data *pg_stat_data) 5292 struct pmu_pg_stats_data *pg_stat_data)
5299{ 5293{
@@ -5463,466 +5457,3 @@ int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id)
5463 status = gk20a_pmu_ap_send_command(g, &ap_cmd, true); 5457 status = gk20a_pmu_ap_send_command(g, &ap_cmd, true);
5464 return status; 5458 return status;
5465} 5459}
5466
5467#ifdef CONFIG_DEBUG_FS
5468static int lpwr_debug_show(struct seq_file *s, void *data)
5469{
5470 struct gk20a *g = s->private;
5471
5472 if (g->ops.pmu.pmu_pg_engines_feature_list &&
5473 g->ops.pmu.pmu_pg_engines_feature_list(g,
5474 PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
5475 PMU_PG_FEATURE_GR_POWER_GATING_ENABLED) {
5476 seq_printf(s, "PSTATE: %u\n"
5477 "RPPG Enabled: %u\n"
5478 "RPPG ref count: %u\n"
5479 "RPPG state: %u\n"
5480 "MSCG Enabled: %u\n"
5481 "MSCG pstate state: %u\n"
5482 "MSCG transition state: %u\n",
5483 g->ops.clk_arb.get_current_pstate(g),
5484 g->elpg_enabled, g->pmu.elpg_refcnt,
5485 g->pmu.elpg_stat, g->mscg_enabled,
5486 g->pmu.mscg_stat, g->pmu.mscg_transition_state);
5487
5488 } else
5489 seq_printf(s, "ELPG Enabled: %u\n"
5490 "ELPG ref count: %u\n"
5491 "ELPG state: %u\n",
5492 g->elpg_enabled, g->pmu.elpg_refcnt,
5493 g->pmu.elpg_stat);
5494
5495 return 0;
5496
5497}
5498
5499static int lpwr_debug_open(struct inode *inode, struct file *file)
5500{
5501 return single_open(file, lpwr_debug_show, inode->i_private);
5502}
5503
5504static const struct file_operations lpwr_debug_fops = {
5505 .open = lpwr_debug_open,
5506 .read = seq_read,
5507 .llseek = seq_lseek,
5508 .release = single_release,
5509};
5510
5511static int mscg_stat_show(struct seq_file *s, void *data)
5512{
5513 struct gk20a *g = s->private;
5514 u64 total_ingating, total_ungating, residency, divisor, dividend;
5515 struct pmu_pg_stats_data pg_stat_data = { 0 };
5516 int err;
5517
5518 /* Don't unnecessarily power on the device */
5519 if (g->power_on) {
5520 err = gk20a_busy(g);
5521 if (err)
5522 return err;
5523
5524 gk20a_pmu_get_pg_stats(g,
5525 PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
5526 gk20a_idle(g);
5527 }
5528 total_ingating = g->pg_ingating_time_us +
5529 (u64)pg_stat_data.ingating_time;
5530 total_ungating = g->pg_ungating_time_us +
5531 (u64)pg_stat_data.ungating_time;
5532
5533 divisor = total_ingating + total_ungating;
5534
5535 /* We compute the residency on a scale of 1000 */
5536 dividend = total_ingating * 1000;
5537
5538 if (divisor)
5539 residency = div64_u64(dividend, divisor);
5540 else
5541 residency = 0;
5542
5543 seq_printf(s,
5544 "Time in MSCG: %llu us\n"
5545 "Time out of MSCG: %llu us\n"
5546 "MSCG residency ratio: %llu\n"
5547 "MSCG Entry Count: %u\n"
5548 "MSCG Avg Entry latency %u\n"
5549 "MSCG Avg Exit latency %u\n",
5550 total_ingating, total_ungating,
5551 residency, pg_stat_data.gating_cnt,
5552 pg_stat_data.avg_entry_latency_us,
5553 pg_stat_data.avg_exit_latency_us);
5554 return 0;
5555
5556}
5557
5558static int mscg_stat_open(struct inode *inode, struct file *file)
5559{
5560 return single_open(file, mscg_stat_show, inode->i_private);
5561}
5562
5563static const struct file_operations mscg_stat_fops = {
5564 .open = mscg_stat_open,
5565 .read = seq_read,
5566 .llseek = seq_lseek,
5567 .release = single_release,
5568};
5569
5570static int mscg_transitions_show(struct seq_file *s, void *data)
5571{
5572 struct gk20a *g = s->private;
5573 struct pmu_pg_stats_data pg_stat_data = { 0 };
5574 u32 total_gating_cnt;
5575 int err;
5576
5577 if (g->power_on) {
5578 err = gk20a_busy(g);
5579 if (err)
5580 return err;
5581
5582 gk20a_pmu_get_pg_stats(g,
5583 PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
5584 gk20a_idle(g);
5585 }
5586 total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
5587
5588 seq_printf(s, "%u\n", total_gating_cnt);
5589 return 0;
5590
5591}
5592
5593static int mscg_transitions_open(struct inode *inode, struct file *file)
5594{
5595 return single_open(file, mscg_transitions_show, inode->i_private);
5596}
5597
5598static const struct file_operations mscg_transitions_fops = {
5599 .open = mscg_transitions_open,
5600 .read = seq_read,
5601 .llseek = seq_lseek,
5602 .release = single_release,
5603};
5604
5605static int elpg_stat_show(struct seq_file *s, void *data)
5606{
5607 struct gk20a *g = s->private;
5608 struct pmu_pg_stats_data pg_stat_data = { 0 };
5609 u64 total_ingating, total_ungating, residency, divisor, dividend;
5610 int err;
5611
5612 /* Don't unnecessarily power on the device */
5613 if (g->power_on) {
5614 err = gk20a_busy(g);
5615 if (err)
5616 return err;
5617
5618 gk20a_pmu_get_pg_stats(g,
5619 PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
5620 gk20a_idle(g);
5621 }
5622 total_ingating = g->pg_ingating_time_us +
5623 (u64)pg_stat_data.ingating_time;
5624 total_ungating = g->pg_ungating_time_us +
5625 (u64)pg_stat_data.ungating_time;
5626 divisor = total_ingating + total_ungating;
5627
5628 /* We compute the residency on a scale of 1000 */
5629 dividend = total_ingating * 1000;
5630
5631 if (divisor)
5632 residency = div64_u64(dividend, divisor);
5633 else
5634 residency = 0;
5635
5636 seq_printf(s,
5637 "Time in ELPG: %llu us\n"
5638 "Time out of ELPG: %llu us\n"
5639 "ELPG residency ratio: %llu\n"
5640 "ELPG Entry Count: %u\n"
5641 "ELPG Avg Entry latency %u us\n"
5642 "ELPG Avg Exit latency %u us\n",
5643 total_ingating, total_ungating,
5644 residency, pg_stat_data.gating_cnt,
5645 pg_stat_data.avg_entry_latency_us,
5646 pg_stat_data.avg_exit_latency_us);
5647 return 0;
5648
5649}
5650
5651static int elpg_stat_open(struct inode *inode, struct file *file)
5652{
5653 return single_open(file, elpg_stat_show, inode->i_private);
5654}
5655
5656static const struct file_operations elpg_stat_fops = {
5657 .open = elpg_stat_open,
5658 .read = seq_read,
5659 .llseek = seq_lseek,
5660 .release = single_release,
5661};
5662
5663static int elpg_transitions_show(struct seq_file *s, void *data)
5664{
5665 struct gk20a *g = s->private;
5666 struct pmu_pg_stats_data pg_stat_data = { 0 };
5667 u32 total_gating_cnt;
5668 int err;
5669
5670 if (g->power_on) {
5671 err = gk20a_busy(g);
5672 if (err)
5673 return err;
5674
5675 gk20a_pmu_get_pg_stats(g,
5676 PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
5677 gk20a_idle(g);
5678 }
5679 total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
5680
5681 seq_printf(s, "%u\n", total_gating_cnt);
5682 return 0;
5683
5684}
5685
5686static int elpg_transitions_open(struct inode *inode, struct file *file)
5687{
5688 return single_open(file, elpg_transitions_show, inode->i_private);
5689}
5690
5691static const struct file_operations elpg_transitions_fops = {
5692 .open = elpg_transitions_open,
5693 .read = seq_read,
5694 .llseek = seq_lseek,
5695 .release = single_release,
5696};
5697
5698static int falc_trace_show(struct seq_file *s, void *data)
5699{
5700 struct gk20a *g = s->private;
5701 struct pmu_gk20a *pmu = &g->pmu;
5702 u32 i = 0, j = 0, k, l, m;
5703 char part_str[40];
5704 void *tracebuffer;
5705 char *trace;
5706 u32 *trace1;
5707
5708 /* allocate system memory to copy pmu trace buffer */
5709 tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
5710 if (tracebuffer == NULL)
5711 return -ENOMEM;
5712
5713 /* read pmu traces into system memory buffer */
5714 nvgpu_mem_rd_n(g, &pmu->trace_buf,
5715 0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE);
5716
5717 trace = (char *)tracebuffer;
5718 trace1 = (u32 *)tracebuffer;
5719
5720 for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
5721 for (j = 0; j < 0x40; j++)
5722 if (trace1[(i / 4) + j])
5723 break;
5724 if (j == 0x40)
5725 break;
5726 seq_printf(s, "Index %x: ", trace1[(i / 4)]);
5727 l = 0;
5728 m = 0;
5729 while (find_hex_in_string((trace+i+20+m), g, &k)) {
5730 if (k >= 40)
5731 break;
5732 strncpy(part_str, (trace+i+20+m), k);
5733 part_str[k] = 0;
5734 seq_printf(s, "%s0x%x", part_str,
5735 trace1[(i / 4) + 1 + l]);
5736 l++;
5737 m += k + 2;
5738 }
5739 seq_printf(s, "%s", (trace+i+20+m));
5740 }
5741
5742 nvgpu_kfree(g, tracebuffer);
5743 return 0;
5744}
5745
5746static int falc_trace_open(struct inode *inode, struct file *file)
5747{
5748 return single_open(file, falc_trace_show, inode->i_private);
5749}
5750
5751static const struct file_operations falc_trace_fops = {
5752 .open = falc_trace_open,
5753 .read = seq_read,
5754 .llseek = seq_lseek,
5755 .release = single_release,
5756};
5757
5758static int perfmon_events_enable_show(struct seq_file *s, void *data)
5759{
5760 struct gk20a *g = s->private;
5761
5762 seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
5763 return 0;
5764
5765}
5766
5767static int perfmon_events_enable_open(struct inode *inode, struct file *file)
5768{
5769 return single_open(file, perfmon_events_enable_show, inode->i_private);
5770}
5771
5772static ssize_t perfmon_events_enable_write(struct file *file,
5773 const char __user *userbuf, size_t count, loff_t *ppos)
5774{
5775 struct seq_file *s = file->private_data;
5776 struct gk20a *g = s->private;
5777 unsigned long val = 0;
5778 char buf[40];
5779 int buf_size;
5780 int err;
5781
5782 memset(buf, 0, sizeof(buf));
5783 buf_size = min(count, (sizeof(buf)-1));
5784
5785 if (copy_from_user(buf, userbuf, buf_size))
5786 return -EFAULT;
5787
5788 if (kstrtoul(buf, 10, &val) < 0)
5789 return -EINVAL;
5790
5791 /* Don't turn on gk20a unnecessarily */
5792 if (g->power_on) {
5793 err = gk20a_busy(g);
5794 if (err)
5795 return err;
5796
5797 if (val && !g->pmu.perfmon_sampling_enabled) {
5798 g->pmu.perfmon_sampling_enabled = true;
5799 pmu_perfmon_start_sampling(&(g->pmu));
5800 } else if (!val && g->pmu.perfmon_sampling_enabled) {
5801 g->pmu.perfmon_sampling_enabled = false;
5802 pmu_perfmon_stop_sampling(&(g->pmu));
5803 }
5804 gk20a_idle(g);
5805 } else {
5806 g->pmu.perfmon_sampling_enabled = val ? true : false;
5807 }
5808
5809 return count;
5810}
5811
5812static const struct file_operations perfmon_events_enable_fops = {
5813 .open = perfmon_events_enable_open,
5814 .read = seq_read,
5815 .write = perfmon_events_enable_write,
5816 .llseek = seq_lseek,
5817 .release = single_release,
5818};
5819
5820static int perfmon_events_count_show(struct seq_file *s, void *data)
5821{
5822 struct gk20a *g = s->private;
5823
5824 seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
5825 return 0;
5826
5827}
5828
5829static int perfmon_events_count_open(struct inode *inode, struct file *file)
5830{
5831 return single_open(file, perfmon_events_count_show, inode->i_private);
5832}
5833
5834static const struct file_operations perfmon_events_count_fops = {
5835 .open = perfmon_events_count_open,
5836 .read = seq_read,
5837 .llseek = seq_lseek,
5838 .release = single_release,
5839};
5840
5841static int security_show(struct seq_file *s, void *data)
5842{
5843 struct gk20a *g = s->private;
5844
5845 seq_printf(s, "%d\n", g->pmu.pmu_mode);
5846 return 0;
5847
5848}
5849
5850static int security_open(struct inode *inode, struct file *file)
5851{
5852 return single_open(file, security_show, inode->i_private);
5853}
5854
5855static const struct file_operations security_fops = {
5856 .open = security_open,
5857 .read = seq_read,
5858 .llseek = seq_lseek,
5859 .release = single_release,
5860};
5861
5862int gk20a_pmu_debugfs_init(struct device *dev)
5863{
5864 struct dentry *d;
5865 struct gk20a_platform *platform = dev_get_drvdata(dev);
5866 struct gk20a *g = get_gk20a(dev);
5867
5868 d = debugfs_create_file(
5869 "lpwr_debug", S_IRUGO|S_IWUSR, platform->debugfs, g,
5870 &lpwr_debug_fops);
5871 if (!d)
5872 goto err_out;
5873
5874 d = debugfs_create_file(
5875 "mscg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
5876 &mscg_stat_fops);
5877 if (!d)
5878 goto err_out;
5879
5880 d = debugfs_create_file(
5881 "mscg_transitions", S_IRUGO, platform->debugfs, g,
5882 &mscg_transitions_fops);
5883 if (!d)
5884 goto err_out;
5885
5886 d = debugfs_create_file(
5887 "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
5888 &elpg_stat_fops);
5889 if (!d)
5890 goto err_out;
5891
5892 d = debugfs_create_file(
5893 "elpg_transitions", S_IRUGO, platform->debugfs, g,
5894 &elpg_transitions_fops);
5895 if (!d)
5896 goto err_out;
5897
5898 d = debugfs_create_file(
5899 "falc_trace", S_IRUGO, platform->debugfs, g,
5900 &falc_trace_fops);
5901 if (!d)
5902 goto err_out;
5903
5904 d = debugfs_create_file(
5905 "perfmon_events_enable", S_IRUGO, platform->debugfs, g,
5906 &perfmon_events_enable_fops);
5907 if (!d)
5908 goto err_out;
5909
5910 d = debugfs_create_file(
5911 "perfmon_events_count", S_IRUGO, platform->debugfs, g,
5912 &perfmon_events_count_fops);
5913 if (!d)
5914 goto err_out;
5915
5916 d = debugfs_create_file(
5917 "pmu_security", S_IRUGO, platform->debugfs, g,
5918 &security_fops);
5919 if (!d)
5920 goto err_out;
5921 return 0;
5922err_out:
5923 pr_err("%s: Failed to make debugfs node\n", __func__);
5924 debugfs_remove_recursive(platform->debugfs);
5925 return -ENOMEM;
5926}
5927
5928#endif
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index e7a8b7c2..cefb6577 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -426,7 +426,6 @@ int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token);
426int gk20a_pmu_destroy(struct gk20a *g); 426int gk20a_pmu_destroy(struct gk20a *g);
427int gk20a_pmu_load_norm(struct gk20a *g, u32 *load); 427int gk20a_pmu_load_norm(struct gk20a *g, u32 *load);
428int gk20a_pmu_load_update(struct gk20a *g); 428int gk20a_pmu_load_update(struct gk20a *g);
429int gk20a_pmu_debugfs_init(struct device *dev);
430void gk20a_pmu_reset_load_counters(struct gk20a *g); 429void gk20a_pmu_reset_load_counters(struct gk20a *g);
431void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, 430void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
432 u32 *total_cycles); 431 u32 *total_cycles);
@@ -468,5 +467,11 @@ int gk20a_pmu_vidmem_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem,
468 u32 size); 467 u32 size);
469int gk20a_pmu_sysmem_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem, 468int gk20a_pmu_sysmem_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem,
470 u32 size); 469 u32 size);
470int gk20a_pmu_get_pg_stats(struct gk20a *g,
471 u32 pg_engine_id, struct pmu_pg_stats_data *pg_stat_data);
472bool nvgpu_find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos);
473
474int nvgpu_pmu_perfmon_start_sampling(struct pmu_gk20a *pmu);
475int nvgpu_pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu);
471 476
472#endif /*__PMU_GK20A_H__*/ 477#endif /*__PMU_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
index b7edf3f0..3f3119af 100644
--- a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
@@ -13,10 +13,6 @@
13 13
14#include <asm/barrier.h> 14#include <asm/barrier.h>
15#include <linux/wait.h> 15#include <linux/wait.h>
16#ifdef CONFIG_DEBUG_FS
17#include <linux/debugfs.h>
18#include "platform_gk20a.h"
19#endif
20#include <linux/uaccess.h> 16#include <linux/uaccess.h>
21#include <linux/poll.h> 17#include <linux/poll.h>
22#include <uapi/linux/nvgpu.h> 18#include <uapi/linux/nvgpu.h>
@@ -523,69 +519,6 @@ int gk20a_sched_dev_release(struct inode *inode, struct file *filp)
523 return 0; 519 return 0;
524} 520}
525 521
526#ifdef CONFIG_DEBUG_FS
527static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
528{
529 struct device *dev = s->private;
530 struct gk20a *g = gk20a_get_platform(dev)->g;
531 struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
532 bool sched_busy = true;
533
534 int n = sched->bitmap_size / sizeof(u64);
535 int i;
536 int err;
537
538 err = gk20a_busy(g);
539 if (err)
540 return err;
541
542 if (nvgpu_mutex_tryacquire(&sched->busy_lock)) {
543 sched_busy = false;
544 nvgpu_mutex_release(&sched->busy_lock);
545 }
546
547 seq_printf(s, "control_locked=%d\n", sched->control_locked);
548 seq_printf(s, "busy=%d\n", sched_busy);
549 seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
550
551 nvgpu_mutex_acquire(&sched->status_lock);
552
553 seq_puts(s, "active_tsg_bitmap\n");
554 for (i = 0; i < n; i++)
555 seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]);
556
557 seq_puts(s, "recent_tsg_bitmap\n");
558 for (i = 0; i < n; i++)
559 seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
560
561 nvgpu_mutex_release(&sched->status_lock);
562
563 gk20a_idle(g);
564
565 return 0;
566}
567
568static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file)
569{
570 return single_open(file, gk20a_sched_debugfs_show, inode->i_private);
571}
572
573static const struct file_operations gk20a_sched_debugfs_fops = {
574 .open = gk20a_sched_debugfs_open,
575 .read = seq_read,
576 .llseek = seq_lseek,
577 .release = single_release,
578};
579
580void gk20a_sched_debugfs_init(struct device *dev)
581{
582 struct gk20a_platform *platform = dev_get_drvdata(dev);
583
584 debugfs_create_file("sched_ctrl", S_IRUGO, platform->debugfs,
585 dev, &gk20a_sched_debugfs_fops);
586}
587#endif /* CONFIG_DEBUG_FS */
588
589void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg) 522void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg)
590{ 523{
591 struct gk20a_sched_ctrl *sched = &g->sched_ctrl; 524 struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.h b/drivers/gpu/nvgpu/gk20a/sched_gk20a.h
index 4f6d1510..776f689d 100644
--- a/drivers/gpu/nvgpu/gk20a/sched_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.h
@@ -48,7 +48,6 @@ void gk20a_sched_ctrl_tsg_added(struct gk20a *, struct tsg_gk20a *);
48void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *); 48void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *);
49int gk20a_sched_ctrl_init(struct gk20a *); 49int gk20a_sched_ctrl_init(struct gk20a *);
50 50
51void gk20a_sched_debugfs_init(struct device *dev);
52void gk20a_sched_ctrl_cleanup(struct gk20a *g); 51void gk20a_sched_ctrl_cleanup(struct gk20a *g);
53 52
54#endif /* __SCHED_GK20A_H */ 53#endif /* __SCHED_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 82c587f9..c6e451e1 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -20,6 +20,7 @@
20#include <nvgpu/kmem.h> 20#include <nvgpu/kmem.h>
21#include <nvgpu/log.h> 21#include <nvgpu/log.h>
22#include <nvgpu/enabled.h> 22#include <nvgpu/enabled.h>
23#include <nvgpu/debug.h>
23 24
24#include "gk20a/gk20a.h" 25#include "gk20a/gk20a.h"
25#include "gk20a/gr_gk20a.h" 26#include "gk20a/gr_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index f5328f03..831fd5da 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -33,11 +33,11 @@
33#include "clk_gm20b.h" 33#include "clk_gm20b.h"
34#include "mc_gm20b.h" 34#include "mc_gm20b.h"
35#include "regops_gm20b.h" 35#include "regops_gm20b.h"
36#include "debug_gm20b.h"
37#include "cde_gm20b.h" 36#include "cde_gm20b.h"
38#include "therm_gm20b.h" 37#include "therm_gm20b.h"
39#include "hal_gm20b.h" 38#include "hal_gm20b.h"
40 39
40#include <nvgpu/debug.h>
41#include <nvgpu/bug.h> 41#include <nvgpu/bug.h>
42#include <nvgpu/enabled.h> 42#include <nvgpu/enabled.h>
43 43
@@ -234,7 +234,7 @@ int gm20b_init_hal(struct gk20a *g)
234 gm20b_init_pmu_ops(gops); 234 gm20b_init_pmu_ops(gops);
235 gm20b_init_clk_ops(gops); 235 gm20b_init_clk_ops(gops);
236 gm20b_init_regops(gops); 236 gm20b_init_regops(gops);
237 gm20b_init_debug_ops(gops); 237 gk20a_init_debug_ops(gops);
238 gk20a_init_dbg_session_ops(gops); 238 gk20a_init_dbg_session_ops(gops);
239 gm20b_init_cde_ops(gops); 239 gm20b_init_cde_ops(gops);
240 gm20b_init_therm_ops(gops); 240 gm20b_init_therm_ops(gops);
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index f28ff45f..d923e5e9 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -53,6 +53,7 @@
53 53
54#include "hal_gp106.h" 54#include "hal_gp106.h"
55 55
56#include <nvgpu/debug.h>
56#include <nvgpu/bug.h> 57#include <nvgpu/bug.h>
57 58
58#include <nvgpu/hw/gp106/hw_proj_gp106.h> 59#include <nvgpu/hw/gp106/hw_proj_gp106.h>
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 98a8be2f..9a30ad7c 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -23,6 +23,7 @@
23#include <nvgpu/gmmu.h> 23#include <nvgpu/gmmu.h>
24#include <nvgpu/dma.h> 24#include <nvgpu/dma.h>
25#include <nvgpu/bug.h> 25#include <nvgpu/bug.h>
26#include <nvgpu/debug.h>
26 27
27#include "gk20a/gk20a.h" 28#include "gk20a/gk20a.h"
28#include "gk20a/gr_gk20a.h" 29#include "gk20a/gr_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index e2a931be..a1906a08 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -44,6 +44,7 @@
44#include "gp10b.h" 44#include "gp10b.h"
45#include "hal_gp10b.h" 45#include "hal_gp10b.h"
46 46
47#include <nvgpu/debug.h>
47#include <nvgpu/bug.h> 48#include <nvgpu/bug.h>
48#include <nvgpu/enabled.h> 49#include <nvgpu/enabled.h>
49 50
diff --git a/drivers/gpu/nvgpu/include/nvgpu/allocator.h b/drivers/gpu/nvgpu/include/nvgpu/allocator.h
index 3579b0fb..567c4422 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/allocator.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/allocator.h
@@ -256,11 +256,13 @@ static inline struct gk20a *nvgpu_alloc_to_gpu(struct nvgpu_allocator *a)
256 return a->g; 256 return a->g;
257} 257}
258 258
259#ifdef CONFIG_DEBUG_FS
259/* 260/*
260 * Common functionality for the internals of the allocators. 261 * Common functionality for the internals of the allocators.
261 */ 262 */
262void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a); 263void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a);
263void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a); 264void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a);
265#endif
264 266
265int __nvgpu_alloc_common_init(struct nvgpu_allocator *a, struct gk20a *g, 267int __nvgpu_alloc_common_init(struct nvgpu_allocator *a, struct gk20a *g,
266 const char *name, void *priv, bool dbg, 268 const char *name, void *priv, bool dbg,
@@ -281,11 +283,6 @@ static inline void nvgpu_alloc_disable_dbg(struct nvgpu_allocator *a)
281 */ 283 */
282extern u32 nvgpu_alloc_tracing_on; 284extern u32 nvgpu_alloc_tracing_on;
283 285
284#ifdef CONFIG_DEBUG_FS
285struct device;
286void nvgpu_alloc_debugfs_init(struct device *dev);
287#endif
288
289#define nvgpu_alloc_trace_func() \ 286#define nvgpu_alloc_trace_func() \
290 do { \ 287 do { \
291 if (nvgpu_alloc_tracing_on) \ 288 if (nvgpu_alloc_tracing_on) \
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/debug.h
index 213922b3..70a03978 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/debug.h
@@ -14,28 +14,42 @@
14 * 14 *
15 */ 15 */
16 16
17#ifndef _DEBUG_GK20A_H_ 17#ifndef __NVGPU_DEBUG_H__
18#define _DEBUG_GK20A_H_ 18#define __NVGPU_DEBUG_H__
19 19
20struct platform_device;
21struct gk20a; 20struct gk20a;
22struct gpu_ops; 21struct gpu_ops;
23 22
24extern unsigned int gk20a_debug_trace_cmdbuf;
25
26struct gk20a_debug_output { 23struct gk20a_debug_output {
27 void (*fn)(void *ctx, const char *str, size_t len); 24 void (*fn)(void *ctx, const char *str, size_t len);
28 void *ctx; 25 void *ctx;
29 char buf[256]; 26 char buf[256];
30}; 27};
31 28
29#ifdef CONFIG_DEBUG_FS
30extern unsigned int gk20a_debug_trace_cmdbuf;
31
32void gk20a_debug_output(struct gk20a_debug_output *o, 32void gk20a_debug_output(struct gk20a_debug_output *o,
33 const char *fmt, ...); 33 const char *fmt, ...);
34 34
35void gk20a_debug_dump(struct device *pdev); 35void gk20a_debug_dump(struct gk20a *g);
36void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o); 36void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o);
37int gk20a_gr_debug_dump(struct device *pdev); 37int gk20a_gr_debug_dump(struct gk20a *g);
38void gk20a_debug_init(struct device *dev, const char *debugfs_symlink);
39void gk20a_init_debug_ops(struct gpu_ops *gops); 38void gk20a_init_debug_ops(struct gpu_ops *gops);
40void gk20a_debug_dump_device(void *dev); 39
40void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink);
41void gk20a_debug_deinit(struct gk20a *g);
42#else
43static inline void gk20a_debug_output(struct gk20a_debug_output *o,
44 const char *fmt, ...) {}
45
46static inline void gk20a_debug_dump(struct gk20a *g) {}
47static inline void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o) {}
48static inline int gk20a_gr_debug_dump(struct gk20a *g) { return 0;}
49static inline void gk20a_init_debug_ops(struct gpu_ops *gops) {}
50
51static inline void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink) {}
52static inline void gk20a_debug_deinit(struct gk20a *g) {}
41#endif 53#endif
54
55#endif /* __NVGPU_DEBUG_H__ */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/kmem.h b/drivers/gpu/nvgpu/include/nvgpu/linux/kmem.h
index dc198a04..611854f2 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/kmem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/kmem.h
@@ -31,12 +31,6 @@ void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
31 unsigned long ip); 31 unsigned long ip);
32void __nvgpu_track_vfree(struct gk20a *g, void *addr); 32void __nvgpu_track_vfree(struct gk20a *g, void *addr);
33void __nvgpu_track_kfree(struct gk20a *g, void *addr); 33void __nvgpu_track_kfree(struct gk20a *g, void *addr);
34
35void nvgpu_kmem_debugfs_init(struct device *dev);
36#else
37static inline void nvgpu_kmem_debugfs_init(struct device *dev)
38{
39}
40#endif 34#endif
41 35
42/** 36/**
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index 02cc5b47..cdd0d378 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -22,10 +22,10 @@
22#include <nvgpu/kmem.h> 22#include <nvgpu/kmem.h>
23#include <nvgpu/bug.h> 23#include <nvgpu/bug.h>
24#include <nvgpu/enabled.h> 24#include <nvgpu/enabled.h>
25#include <nvgpu/debug.h>
25 26
26#include "vgpu/vgpu.h" 27#include "vgpu/vgpu.h"
27#include "vgpu/fecs_trace_vgpu.h" 28#include "vgpu/fecs_trace_vgpu.h"
28#include "gk20a/debug_gk20a.h"
29#include "gk20a/hal_gk20a.h" 29#include "gk20a/hal_gk20a.h"
30#include "gk20a/ctxsw_trace_gk20a.h" 30#include "gk20a/ctxsw_trace_gk20a.h"
31#include "gk20a/tsg_gk20a.h" 31#include "gk20a/tsg_gk20a.h"
@@ -667,7 +667,7 @@ int vgpu_probe(struct platform_device *pdev)
667 if (err) 667 if (err)
668 return err; 668 return err;
669 669
670 gk20a_debug_init(dev, "gpu.0"); 670 gk20a_debug_init(gk20a, "gpu.0");
671 671
672 /* Set DMA parameters to allow larger sgt lists */ 672 /* Set DMA parameters to allow larger sgt lists */
673 dev->dma_parms = &gk20a->dma_parms; 673 dev->dma_parms = &gk20a->dma_parms;