summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/debug_fifo.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2017-05-24 08:07:04 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-06-02 09:53:35 -0400
commit6090a8a7ee347f92d806f104d3a0082208f5df64 (patch)
tree74b0d7057ea1b112d7de41f1bbce5e212f1525de /drivers/gpu/nvgpu/common/linux/debug_fifo.c
parentbe7f22db8bc5bff131432a4f6d127ecc8ce5096d (diff)
gpu: nvgpu: move debugfs code to linux module
Since all debugfs code is Linux specific, remove it from common code and move it to Linux module Debugfs code is now divided into below module specific files : common/linux/debug.c common/linux/debug_cde.c common/linux/debug_ce.c common/linux/debug_fifo.c common/linux/debug_gr.c common/linux/debug_mm.c common/linux/debug_allocator.c common/linux/debug_kmem.c common/linux/debug_pmu.c common/linux/debug_sched.c Add corresponding header files for above modules too And compile all of above files only if CONFIG_DEBUG_FS is set Some more details of the changes made - Move and rename gk20a/debug_gk20a.c to common/linux/debug.c - Move and rename gk20a/debug_gk20a.h to include/nvgpu/debug.h - Remove gm20b/debug_gm20b.c and gm20b/debug_gm20b.h and call gk20a_init_debug_ops() directly from gm20b_init_hal() - Update all debug APIs to receive struct gk20a as parameter instead of receiving struct device pointer - Update API gk20a_dmabuf_get_state() to receive struct gk20a pointer instead of struct device - Include <nvgpu/debug.h> explicitly in all files where debug operations are used - Remove "gk20a/platform_gk20a.h" include from HAL files which no longer need this include - Add new API gk20a_debug_deinit() to deinitialize debugfs and call it from gk20a_remove() - Move API gk20a_debug_dump_all_channel_status_ramfc() to gk20a/fifo_gk20a.c Jira NVGPU-62 Change-Id: I076975d3d7f669bdbe9212fa33d98529377feeb6 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1488902 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/debug_fifo.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_fifo.c369
1 files changed, 369 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
new file mode 100644
index 00000000..6a28b1a5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
@@ -0,0 +1,369 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_fifo.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21#include <nvgpu/sort.h>
22
23void __gk20a_fifo_profile_free(struct kref *ref);
24
25static void *gk20a_fifo_sched_debugfs_seq_start(
26 struct seq_file *s, loff_t *pos)
27{
28 struct gk20a *g = s->private;
29 struct fifo_gk20a *f = &g->fifo;
30
31 if (*pos >= f->num_channels)
32 return NULL;
33
34 return &f->channel[*pos];
35}
36
37static void *gk20a_fifo_sched_debugfs_seq_next(
38 struct seq_file *s, void *v, loff_t *pos)
39{
40 struct gk20a *g = s->private;
41 struct fifo_gk20a *f = &g->fifo;
42
43 ++(*pos);
44 if (*pos >= f->num_channels)
45 return NULL;
46
47 return &f->channel[*pos];
48}
49
50static void gk20a_fifo_sched_debugfs_seq_stop(
51 struct seq_file *s, void *v)
52{
53}
54
55static int gk20a_fifo_sched_debugfs_seq_show(
56 struct seq_file *s, void *v)
57{
58 struct gk20a *g = s->private;
59 struct fifo_gk20a *f = &g->fifo;
60 struct channel_gk20a *ch = v;
61 struct tsg_gk20a *tsg = NULL;
62
63 struct fifo_engine_info_gk20a *engine_info;
64 struct fifo_runlist_info_gk20a *runlist;
65 u32 runlist_id;
66 int ret = SEQ_SKIP;
67 u32 engine_id;
68
69 engine_id = gk20a_fifo_get_gr_engine_id(g);
70 engine_info = (f->engine_info + engine_id);
71 runlist_id = engine_info->runlist_id;
72 runlist = &f->runlist_info[runlist_id];
73
74 if (ch == f->channel) {
75 seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n");
76 seq_puts(s, " (usecs) (msecs)\n");
77 ret = 0;
78 }
79
80 if (!test_bit(ch->hw_chid, runlist->active_channels))
81 return ret;
82
83 if (gk20a_channel_get(ch)) {
84 if (gk20a_is_channel_marked_as_tsg(ch))
85 tsg = &f->tsg[ch->tsgid];
86
87 seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
88 ch->hw_chid,
89 ch->tsgid,
90 ch->tgid,
91 tsg ? tsg->timeslice_us : ch->timeslice_us,
92 ch->timeout_ms_max,
93 tsg ? tsg->interleave_level : ch->interleave_level,
94 ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX,
95 ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX);
96 gk20a_channel_put(ch);
97 }
98 return 0;
99}
100
101static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
102 .start = gk20a_fifo_sched_debugfs_seq_start,
103 .next = gk20a_fifo_sched_debugfs_seq_next,
104 .stop = gk20a_fifo_sched_debugfs_seq_stop,
105 .show = gk20a_fifo_sched_debugfs_seq_show
106};
107
108static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
109 struct file *file)
110{
111 int err;
112
113 if (!capable(CAP_SYS_ADMIN))
114 return -EPERM;
115
116 err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
117 if (err)
118 return err;
119
120 gk20a_dbg(gpu_dbg_info, "i_private=%p", inode->i_private);
121
122 ((struct seq_file *)file->private_data)->private = inode->i_private;
123 return 0;
124};
125
126/*
127 * The file operations structure contains our open function along with
128 * set of the canned seq_ ops.
129 */
130static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
131 .owner = THIS_MODULE,
132 .open = gk20a_fifo_sched_debugfs_open,
133 .read = seq_read,
134 .llseek = seq_lseek,
135 .release = seq_release
136};
137
138static int gk20a_fifo_profile_enable(void *data, u64 val)
139{
140 struct gk20a *g = (struct gk20a *) data;
141 struct fifo_gk20a *f = &g->fifo;
142
143
144 nvgpu_mutex_acquire(&f->profile.lock);
145 if (val == 0) {
146 if (f->profile.enabled) {
147 f->profile.enabled = false;
148 kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
149 }
150 } else {
151 if (!f->profile.enabled) {
152 /* not kref init as it can have a running condition if
153 * we enable/disable/enable while kickoff is happening
154 */
155 if (!kref_get_unless_zero(&f->profile.ref)) {
156 f->profile.data = vzalloc(
157 FIFO_PROFILING_ENTRIES *
158 sizeof(struct fifo_profile_gk20a));
159 f->profile.sorted = vzalloc(
160 FIFO_PROFILING_ENTRIES *
161 sizeof(u64));
162 if (!(f->profile.data && f->profile.sorted)) {
163 nvgpu_vfree(g, f->profile.data);
164 nvgpu_vfree(g, f->profile.sorted);
165 nvgpu_mutex_release(&f->profile.lock);
166 return -ENOMEM;
167 }
168 kref_init(&f->profile.ref);
169 }
170 atomic_set(&f->profile.get, 0);
171 f->profile.enabled = true;
172 }
173 }
174 nvgpu_mutex_release(&f->profile.lock);
175
176 return 0;
177}
178
179DEFINE_SIMPLE_ATTRIBUTE(
180 gk20a_fifo_profile_enable_debugfs_fops,
181 NULL,
182 gk20a_fifo_profile_enable,
183 "%llu\n"
184);
185
186static int __profile_cmp(const void *a, const void *b)
187{
188 return *((unsigned long long *) a) - *((unsigned long long *) b);
189}
190
191/*
192 * This uses about 800b in the stack, but the function using it is not part
193 * of a callstack where much memory is being used, so it is fine
194 */
195#define PERCENTILE_WIDTH 5
196#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH)
197
198static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
199 u64 *percentiles, u32 index_end, u32 index_start)
200{
201 unsigned int nelem = 0;
202 unsigned int index;
203 struct fifo_profile_gk20a *profile;
204
205 for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
206 profile = &g->fifo.profile.data[index];
207
208 if (profile->timestamp[index_end] >
209 profile->timestamp[index_start]) {
210 /* This is a valid element */
211 g->fifo.profile.sorted[nelem] =
212 profile->timestamp[index_end] -
213 profile->timestamp[index_start];
214 nelem++;
215 }
216 }
217
218 /* sort it */
219 sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
220 __profile_cmp, NULL);
221
222 /* build ranges */
223 for (index = 0; index < PERCENTILE_RANGES; index++)
224 percentiles[index] =
225 g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
226 nelem)/100 - 1];
227 return nelem;
228}
229
230static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
231{
232 struct gk20a *g = s->private;
233 unsigned int get, nelem, index;
234 /*
235 * 800B in the stack, but function is declared statically and only
236 * called from debugfs handler
237 */
238 u64 percentiles_ioctl[PERCENTILE_RANGES];
239 u64 percentiles_kickoff[PERCENTILE_RANGES];
240 u64 percentiles_jobtracking[PERCENTILE_RANGES];
241 u64 percentiles_append[PERCENTILE_RANGES];
242 u64 percentiles_userd[PERCENTILE_RANGES];
243
244 if (!kref_get_unless_zero(&g->fifo.profile.ref)) {
245 seq_printf(s, "Profiling disabled\n");
246 return 0;
247 }
248
249 get = atomic_read(&g->fifo.profile.get);
250
251 __gk20a_fifo_create_stats(g, percentiles_ioctl,
252 PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
253 __gk20a_fifo_create_stats(g, percentiles_kickoff,
254 PROFILE_END, PROFILE_ENTRY);
255 __gk20a_fifo_create_stats(g, percentiles_jobtracking,
256 PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
257 __gk20a_fifo_create_stats(g, percentiles_append,
258 PROFILE_APPEND, PROFILE_JOB_TRACKING);
259 nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
260 PROFILE_END, PROFILE_APPEND);
261
262 seq_printf(s, "Number of kickoffs: %d\n", nelem);
263 seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
264
265 for (index = 0; index < PERCENTILE_RANGES; index++)
266 seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
267 PERCENTILE_WIDTH * (index+1),
268 percentiles_ioctl[index],
269 percentiles_kickoff[index],
270 percentiles_append[index],
271 percentiles_jobtracking[index],
272 percentiles_userd[index]);
273
274 kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
275
276 return 0;
277}
278
279static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
280{
281 return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
282}
283
284static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
285 .open = gk20a_fifo_profile_stats_open,
286 .read = seq_read,
287 .llseek = seq_lseek,
288 .release = single_release,
289};
290
291
292void gk20a_fifo_debugfs_init(struct gk20a *g)
293{
294 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
295
296 struct dentry *gpu_root = platform->debugfs;
297 struct dentry *fifo_root;
298 struct dentry *profile_root;
299
300 fifo_root = debugfs_create_dir("fifo", gpu_root);
301 if (IS_ERR_OR_NULL(fifo_root))
302 return;
303
304 gk20a_dbg(gpu_dbg_info, "g=%p", g);
305
306 debugfs_create_file("sched", 0600, fifo_root, g,
307 &gk20a_fifo_sched_debugfs_fops);
308
309 profile_root = debugfs_create_dir("profile", fifo_root);
310 if (IS_ERR_OR_NULL(profile_root))
311 return;
312
313 nvgpu_mutex_init(&g->fifo.profile.lock);
314 g->fifo.profile.enabled = false;
315 atomic_set(&g->fifo.profile.get, 0);
316 atomic_set(&g->fifo.profile.ref.refcount, 0);
317
318 debugfs_create_file("enable", 0600, profile_root, g,
319 &gk20a_fifo_profile_enable_debugfs_fops);
320
321 debugfs_create_file("stats", 0600, profile_root, g,
322 &gk20a_fifo_profile_stats_debugfs_fops);
323
324}
325
326void __gk20a_fifo_profile_free(struct kref *ref)
327{
328 struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
329 profile.ref);
330 nvgpu_vfree(f->g, f->profile.data);
331 nvgpu_vfree(f->g, f->profile.sorted);
332}
333
334/* Get the next element in the ring buffer of profile entries
335 * and grab a reference to the structure
336 */
337struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
338{
339 struct fifo_gk20a *f = &g->fifo;
340 struct fifo_profile_gk20a *profile;
341 unsigned int index;
342
343 /* If kref is zero, profiling is not enabled */
344 if (!kref_get_unless_zero(&f->profile.ref))
345 return NULL;
346 index = atomic_inc_return(&f->profile.get);
347 profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
348
349 return profile;
350}
351
352/* Free the reference to the structure. This allows deferred cleanups */
353void gk20a_fifo_profile_release(struct gk20a *g,
354 struct fifo_profile_gk20a *profile)
355{
356 kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
357}
358
359void gk20a_fifo_debugfs_deinit(struct gk20a *g)
360{
361 struct fifo_gk20a *f = &g->fifo;
362
363 nvgpu_mutex_acquire(&f->profile.lock);
364 if (f->profile.enabled) {
365 f->profile.enabled = false;
366 kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
367 }
368 nvgpu_mutex_release(&f->profile.lock);
369}