diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 792 |
1 files changed, 792 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c new file mode 100644 index 00000000..d283a82e --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -0,0 +1,792 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <asm/barrier.h> | ||
24 | #ifdef CONFIG_DEBUG_FS | ||
25 | #include <linux/debugfs.h> | ||
26 | #endif | ||
27 | #include <uapi/linux/nvgpu.h> | ||
28 | |||
29 | #include <nvgpu/kmem.h> | ||
30 | #include <nvgpu/dma.h> | ||
31 | #include <nvgpu/bug.h> | ||
32 | #include <nvgpu/hashtable.h> | ||
33 | #include <nvgpu/circ_buf.h> | ||
34 | #include <nvgpu/thread.h> | ||
35 | #include <nvgpu/barrier.h> | ||
36 | #include <nvgpu/mm.h> | ||
37 | #include <nvgpu/enabled.h> | ||
38 | #include <nvgpu/ctxsw_trace.h> | ||
39 | |||
40 | #include "fecs_trace_gk20a.h" | ||
41 | #include "gk20a.h" | ||
42 | #include "gr_gk20a.h" | ||
43 | #include "common/linux/os_linux.h" | ||
44 | |||
45 | #include <nvgpu/log.h> | ||
46 | |||
47 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> | ||
48 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | ||
49 | |||
50 | /* | ||
51 | * If HW circular buffer is getting too many "buffer full" conditions, | ||
52 | * increasing this constant should help (it drives Linux' internal buffer size). | ||
53 | */ | ||
54 | #define GK20A_FECS_TRACE_NUM_RECORDS (1 << 6) | ||
55 | #define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */ | ||
56 | #define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL) | ||
57 | #define GK20A_FECS_TRACE_PTIMER_SHIFT 5 | ||
58 | |||
59 | struct gk20a_fecs_trace_record { | ||
60 | u32 magic_lo; | ||
61 | u32 magic_hi; | ||
62 | u32 context_id; | ||
63 | u32 context_ptr; | ||
64 | u32 new_context_id; | ||
65 | u32 new_context_ptr; | ||
66 | u64 ts[]; | ||
67 | }; | ||
68 | |||
69 | struct gk20a_fecs_trace_hash_ent { | ||
70 | u32 context_ptr; | ||
71 | pid_t pid; | ||
72 | struct hlist_node node; | ||
73 | }; | ||
74 | |||
75 | struct gk20a_fecs_trace { | ||
76 | |||
77 | struct nvgpu_mem trace_buf; | ||
78 | DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS); | ||
79 | struct nvgpu_mutex hash_lock; | ||
80 | struct nvgpu_mutex poll_lock; | ||
81 | struct nvgpu_thread poll_task; | ||
82 | bool init; | ||
83 | }; | ||
84 | |||
85 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
86 | static inline u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts) | ||
87 | { | ||
88 | return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32)); | ||
89 | } | ||
90 | |||
91 | static inline u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts) | ||
92 | { | ||
93 | return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32); | ||
94 | } | ||
95 | |||
96 | |||
97 | static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch) | ||
98 | { | ||
99 | return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL); | ||
100 | } | ||
101 | |||
102 | static inline int gk20a_fecs_trace_num_ts(void) | ||
103 | { | ||
104 | return (ctxsw_prog_record_timestamp_record_size_in_bytes_v() | ||
105 | - sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64); | ||
106 | } | ||
107 | |||
108 | static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record( | ||
109 | struct gk20a_fecs_trace *trace, int idx) | ||
110 | { | ||
111 | return (struct gk20a_fecs_trace_record *) | ||
112 | ((u8 *) trace->trace_buf.cpu_va | ||
113 | + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v())); | ||
114 | } | ||
115 | |||
116 | static bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r) | ||
117 | { | ||
118 | /* | ||
119 | * testing magic_hi should suffice. magic_lo is sometimes used | ||
120 | * as a sequence number in experimental ucode. | ||
121 | */ | ||
122 | return (r->magic_hi | ||
123 | == ctxsw_prog_record_timestamp_magic_value_hi_v_value_v()); | ||
124 | } | ||
125 | |||
126 | static int gk20a_fecs_trace_get_read_index(struct gk20a *g) | ||
127 | { | ||
128 | return gr_gk20a_elpg_protected_call(g, | ||
129 | gk20a_readl(g, gr_fecs_mailbox1_r())); | ||
130 | } | ||
131 | |||
132 | static int gk20a_fecs_trace_get_write_index(struct gk20a *g) | ||
133 | { | ||
134 | return gr_gk20a_elpg_protected_call(g, | ||
135 | gk20a_readl(g, gr_fecs_mailbox0_r())); | ||
136 | } | ||
137 | |||
138 | static int gk20a_fecs_trace_set_read_index(struct gk20a *g, int index) | ||
139 | { | ||
140 | gk20a_dbg(gpu_dbg_ctxsw, "set read=%d", index); | ||
141 | return gr_gk20a_elpg_protected_call(g, | ||
142 | (gk20a_writel(g, gr_fecs_mailbox1_r(), index), 0)); | ||
143 | } | ||
144 | |||
145 | void gk20a_fecs_trace_hash_dump(struct gk20a *g) | ||
146 | { | ||
147 | u32 bkt; | ||
148 | struct gk20a_fecs_trace_hash_ent *ent; | ||
149 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
150 | |||
151 | gk20a_dbg(gpu_dbg_ctxsw, "dumping hash table"); | ||
152 | |||
153 | nvgpu_mutex_acquire(&trace->hash_lock); | ||
154 | hash_for_each(trace->pid_hash_table, bkt, ent, node) | ||
155 | { | ||
156 | gk20a_dbg(gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d", | ||
157 | ent, bkt, ent->context_ptr, ent->pid); | ||
158 | |||
159 | } | ||
160 | nvgpu_mutex_release(&trace->hash_lock); | ||
161 | } | ||
162 | |||
163 | static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid) | ||
164 | { | ||
165 | struct gk20a_fecs_trace_hash_ent *he; | ||
166 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
167 | |||
168 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, | ||
169 | "adding hash entry context_ptr=%x -> pid=%d", context_ptr, pid); | ||
170 | |||
171 | he = nvgpu_kzalloc(g, sizeof(*he)); | ||
172 | if (unlikely(!he)) { | ||
173 | nvgpu_warn(g, | ||
174 | "can't alloc new hash entry for context_ptr=%x pid=%d", | ||
175 | context_ptr, pid); | ||
176 | return -ENOMEM; | ||
177 | } | ||
178 | |||
179 | he->context_ptr = context_ptr; | ||
180 | he->pid = pid; | ||
181 | nvgpu_mutex_acquire(&trace->hash_lock); | ||
182 | hash_add(trace->pid_hash_table, &he->node, context_ptr); | ||
183 | nvgpu_mutex_release(&trace->hash_lock); | ||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr) | ||
188 | { | ||
189 | struct hlist_node *tmp; | ||
190 | struct gk20a_fecs_trace_hash_ent *ent; | ||
191 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
192 | |||
193 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, | ||
194 | "freeing hash entry context_ptr=%x", context_ptr); | ||
195 | |||
196 | nvgpu_mutex_acquire(&trace->hash_lock); | ||
197 | hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node, | ||
198 | context_ptr) { | ||
199 | if (ent->context_ptr == context_ptr) { | ||
200 | hash_del(&ent->node); | ||
201 | gk20a_dbg(gpu_dbg_ctxsw, | ||
202 | "freed hash entry=%p context_ptr=%x", ent, | ||
203 | ent->context_ptr); | ||
204 | nvgpu_kfree(g, ent); | ||
205 | break; | ||
206 | } | ||
207 | } | ||
208 | nvgpu_mutex_release(&trace->hash_lock); | ||
209 | } | ||
210 | |||
211 | static void gk20a_fecs_trace_free_hash_table(struct gk20a *g) | ||
212 | { | ||
213 | u32 bkt; | ||
214 | struct hlist_node *tmp; | ||
215 | struct gk20a_fecs_trace_hash_ent *ent; | ||
216 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
217 | |||
218 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace); | ||
219 | |||
220 | nvgpu_mutex_acquire(&trace->hash_lock); | ||
221 | hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) { | ||
222 | hash_del(&ent->node); | ||
223 | nvgpu_kfree(g, ent); | ||
224 | } | ||
225 | nvgpu_mutex_release(&trace->hash_lock); | ||
226 | |||
227 | } | ||
228 | |||
229 | static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr) | ||
230 | { | ||
231 | struct gk20a_fecs_trace_hash_ent *ent; | ||
232 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
233 | pid_t pid = 0; | ||
234 | |||
235 | nvgpu_mutex_acquire(&trace->hash_lock); | ||
236 | hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) { | ||
237 | if (ent->context_ptr == context_ptr) { | ||
238 | gk20a_dbg(gpu_dbg_ctxsw, | ||
239 | "found context_ptr=%x -> pid=%d", | ||
240 | ent->context_ptr, ent->pid); | ||
241 | pid = ent->pid; | ||
242 | break; | ||
243 | } | ||
244 | } | ||
245 | nvgpu_mutex_release(&trace->hash_lock); | ||
246 | |||
247 | return pid; | ||
248 | } | ||
249 | |||
250 | /* | ||
251 | * Converts HW entry format to userspace-facing format and pushes it to the | ||
252 | * queue. | ||
253 | */ | ||
254 | static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index) | ||
255 | { | ||
256 | int i; | ||
257 | struct nvgpu_ctxsw_trace_entry entry = { }; | ||
258 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
259 | pid_t cur_pid; | ||
260 | pid_t new_pid; | ||
261 | |||
262 | /* for now, only one VM */ | ||
263 | const int vmid = 0; | ||
264 | |||
265 | struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record( | ||
266 | trace, index); | ||
267 | |||
268 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, | ||
269 | "consuming record trace=%p read=%d record=%p", trace, index, r); | ||
270 | |||
271 | if (unlikely(!gk20a_fecs_trace_is_valid_record(r))) { | ||
272 | nvgpu_warn(g, | ||
273 | "trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)", | ||
274 | trace, index, r, r->magic_lo, r->magic_hi); | ||
275 | return -EINVAL; | ||
276 | } | ||
277 | |||
278 | /* Clear magic_hi to detect cases where CPU could read write index | ||
279 | * before FECS record is actually written to DRAM. This should not | ||
280 | * as we force FECS writes to SYSMEM by reading through PRAMIN. | ||
281 | */ | ||
282 | r->magic_hi = 0; | ||
283 | |||
284 | cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr); | ||
285 | new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr); | ||
286 | |||
287 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, | ||
288 | "context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)", | ||
289 | r->context_ptr, cur_pid, r->new_context_ptr, new_pid); | ||
290 | |||
291 | entry.context_id = r->context_id; | ||
292 | entry.vmid = vmid; | ||
293 | |||
294 | /* break out FECS record into trace events */ | ||
295 | for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) { | ||
296 | |||
297 | entry.tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]); | ||
298 | entry.timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]); | ||
299 | entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT; | ||
300 | |||
301 | gk20a_dbg(gpu_dbg_ctxsw, | ||
302 | "tag=%x timestamp=%llx context_id=%08x new_context_id=%08x", | ||
303 | entry.tag, entry.timestamp, r->context_id, | ||
304 | r->new_context_id); | ||
305 | |||
306 | switch (entry.tag) { | ||
307 | case NVGPU_CTXSW_TAG_RESTORE_START: | ||
308 | case NVGPU_CTXSW_TAG_CONTEXT_START: | ||
309 | entry.context_id = r->new_context_id; | ||
310 | entry.pid = new_pid; | ||
311 | break; | ||
312 | |||
313 | case NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST: | ||
314 | case NVGPU_CTXSW_TAG_FE_ACK: | ||
315 | case NVGPU_CTXSW_TAG_FE_ACK_WFI: | ||
316 | case NVGPU_CTXSW_TAG_FE_ACK_GFXP: | ||
317 | case NVGPU_CTXSW_TAG_FE_ACK_CTAP: | ||
318 | case NVGPU_CTXSW_TAG_FE_ACK_CILP: | ||
319 | case NVGPU_CTXSW_TAG_SAVE_END: | ||
320 | entry.context_id = r->context_id; | ||
321 | entry.pid = cur_pid; | ||
322 | break; | ||
323 | |||
324 | default: | ||
325 | /* tags are not guaranteed to start at the beginning */ | ||
326 | WARN_ON(entry.tag && (entry.tag != NVGPU_CTXSW_TAG_INVALID_TIMESTAMP)); | ||
327 | continue; | ||
328 | } | ||
329 | |||
330 | gk20a_dbg(gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld", | ||
331 | entry.tag, entry.context_id, entry.pid); | ||
332 | |||
333 | if (!entry.context_id) | ||
334 | continue; | ||
335 | |||
336 | gk20a_ctxsw_trace_write(g, &entry); | ||
337 | } | ||
338 | |||
339 | gk20a_ctxsw_trace_wake_up(g, vmid); | ||
340 | return 0; | ||
341 | } | ||
342 | |||
343 | int gk20a_fecs_trace_poll(struct gk20a *g) | ||
344 | { | ||
345 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
346 | |||
347 | int read = 0; | ||
348 | int write = 0; | ||
349 | int cnt; | ||
350 | int err; | ||
351 | |||
352 | err = gk20a_busy(g); | ||
353 | if (unlikely(err)) | ||
354 | return err; | ||
355 | |||
356 | nvgpu_mutex_acquire(&trace->poll_lock); | ||
357 | write = gk20a_fecs_trace_get_write_index(g); | ||
358 | if (unlikely((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS))) { | ||
359 | nvgpu_err(g, | ||
360 | "failed to acquire write index, write=%d", write); | ||
361 | err = write; | ||
362 | goto done; | ||
363 | } | ||
364 | |||
365 | read = gk20a_fecs_trace_get_read_index(g); | ||
366 | |||
367 | cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS); | ||
368 | if (!cnt) | ||
369 | goto done; | ||
370 | |||
371 | gk20a_dbg(gpu_dbg_ctxsw, | ||
372 | "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d", | ||
373 | read, gk20a_fecs_trace_get_read_index(g), write, cnt); | ||
374 | |||
375 | /* Ensure all FECS writes have made it to SYSMEM */ | ||
376 | g->ops.mm.fb_flush(g); | ||
377 | |||
378 | while (read != write) { | ||
379 | /* Ignore error code, as we want to consume all records */ | ||
380 | (void)gk20a_fecs_trace_ring_read(g, read); | ||
381 | |||
382 | /* Get to next record. */ | ||
383 | read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); | ||
384 | } | ||
385 | |||
386 | /* ensure FECS records has been updated before incrementing read index */ | ||
387 | nvgpu_smp_wmb(); | ||
388 | gk20a_fecs_trace_set_read_index(g, read); | ||
389 | |||
390 | done: | ||
391 | nvgpu_mutex_release(&trace->poll_lock); | ||
392 | gk20a_idle(g); | ||
393 | return err; | ||
394 | } | ||
395 | |||
396 | static int gk20a_fecs_trace_periodic_polling(void *arg) | ||
397 | { | ||
398 | struct gk20a *g = (struct gk20a *)arg; | ||
399 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
400 | |||
401 | pr_info("%s: running\n", __func__); | ||
402 | |||
403 | while (!nvgpu_thread_should_stop(&trace->poll_task)) { | ||
404 | |||
405 | nvgpu_usleep_range(GK20A_FECS_TRACE_FRAME_PERIOD_US, | ||
406 | GK20A_FECS_TRACE_FRAME_PERIOD_US * 2); | ||
407 | |||
408 | gk20a_fecs_trace_poll(g); | ||
409 | } | ||
410 | |||
411 | return 0; | ||
412 | } | ||
413 | |||
414 | static int gk20a_fecs_trace_alloc_ring(struct gk20a *g) | ||
415 | { | ||
416 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
417 | |||
418 | return nvgpu_dma_alloc_sys(g, GK20A_FECS_TRACE_NUM_RECORDS | ||
419 | * ctxsw_prog_record_timestamp_record_size_in_bytes_v(), | ||
420 | &trace->trace_buf); | ||
421 | } | ||
422 | |||
423 | static void gk20a_fecs_trace_free_ring(struct gk20a *g) | ||
424 | { | ||
425 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
426 | |||
427 | nvgpu_dma_free(g, &trace->trace_buf); | ||
428 | } | ||
429 | |||
430 | #ifdef CONFIG_DEBUG_FS | ||
431 | /* | ||
432 | * The sequence iterator functions. We simply use the count of the | ||
433 | * next line as our internal position. | ||
434 | */ | ||
435 | static void *gk20a_fecs_trace_debugfs_ring_seq_start( | ||
436 | struct seq_file *s, loff_t *pos) | ||
437 | { | ||
438 | if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS) | ||
439 | return NULL; | ||
440 | |||
441 | return pos; | ||
442 | } | ||
443 | |||
444 | static void *gk20a_fecs_trace_debugfs_ring_seq_next( | ||
445 | struct seq_file *s, void *v, loff_t *pos) | ||
446 | { | ||
447 | ++(*pos); | ||
448 | if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS) | ||
449 | return NULL; | ||
450 | return pos; | ||
451 | } | ||
452 | |||
453 | static void gk20a_fecs_trace_debugfs_ring_seq_stop( | ||
454 | struct seq_file *s, void *v) | ||
455 | { | ||
456 | } | ||
457 | |||
458 | static int gk20a_fecs_trace_debugfs_ring_seq_show( | ||
459 | struct seq_file *s, void *v) | ||
460 | { | ||
461 | loff_t *pos = (loff_t *) v; | ||
462 | struct gk20a *g = *(struct gk20a **)s->private; | ||
463 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
464 | struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(trace, *pos); | ||
465 | int i; | ||
466 | const u32 invalid_tag = | ||
467 | ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(); | ||
468 | u32 tag; | ||
469 | u64 timestamp; | ||
470 | |||
471 | seq_printf(s, "record #%lld (%p)\n", *pos, r); | ||
472 | seq_printf(s, "\tmagic_lo=%08x\n", r->magic_lo); | ||
473 | seq_printf(s, "\tmagic_hi=%08x\n", r->magic_hi); | ||
474 | if (gk20a_fecs_trace_is_valid_record(r)) { | ||
475 | seq_printf(s, "\tcontext_ptr=%08x\n", r->context_ptr); | ||
476 | seq_printf(s, "\tcontext_id=%08x\n", r->context_id); | ||
477 | seq_printf(s, "\tnew_context_ptr=%08x\n", r->new_context_ptr); | ||
478 | seq_printf(s, "\tnew_context_id=%08x\n", r->new_context_id); | ||
479 | for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) { | ||
480 | tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]); | ||
481 | if (tag == invalid_tag) | ||
482 | continue; | ||
483 | timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]); | ||
484 | timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT; | ||
485 | seq_printf(s, "\ttag=%02x timestamp=%012llx\n", tag, timestamp); | ||
486 | } | ||
487 | } | ||
488 | return 0; | ||
489 | } | ||
490 | |||
491 | /* | ||
492 | * Tie them all together into a set of seq_operations. | ||
493 | */ | ||
494 | static const struct seq_operations gk20a_fecs_trace_debugfs_ring_seq_ops = { | ||
495 | .start = gk20a_fecs_trace_debugfs_ring_seq_start, | ||
496 | .next = gk20a_fecs_trace_debugfs_ring_seq_next, | ||
497 | .stop = gk20a_fecs_trace_debugfs_ring_seq_stop, | ||
498 | .show = gk20a_fecs_trace_debugfs_ring_seq_show | ||
499 | }; | ||
500 | |||
501 | /* | ||
502 | * Time to set up the file operations for our /proc file. In this case, | ||
503 | * all we need is an open function which sets up the sequence ops. | ||
504 | */ | ||
505 | |||
506 | static int gk20a_ctxsw_debugfs_ring_open(struct inode *inode, | ||
507 | struct file *file) | ||
508 | { | ||
509 | struct gk20a **p; | ||
510 | |||
511 | if (!capable(CAP_SYS_ADMIN)) | ||
512 | return -EPERM; | ||
513 | |||
514 | p = __seq_open_private(file, &gk20a_fecs_trace_debugfs_ring_seq_ops, | ||
515 | sizeof(struct gk20a *)); | ||
516 | if (!p) | ||
517 | return -ENOMEM; | ||
518 | |||
519 | *p = (struct gk20a *)inode->i_private; | ||
520 | return 0; | ||
521 | }; | ||
522 | |||
523 | /* | ||
524 | * The file operations structure contains our open function along with | ||
525 | * set of the canned seq_ ops. | ||
526 | */ | ||
527 | static const struct file_operations gk20a_fecs_trace_debugfs_ring_fops = { | ||
528 | .owner = THIS_MODULE, | ||
529 | .open = gk20a_ctxsw_debugfs_ring_open, | ||
530 | .read = seq_read, | ||
531 | .llseek = seq_lseek, | ||
532 | .release = seq_release_private | ||
533 | }; | ||
534 | |||
535 | static int gk20a_fecs_trace_debugfs_read(void *arg, u64 *val) | ||
536 | { | ||
537 | *val = gk20a_fecs_trace_get_read_index((struct gk20a *)arg); | ||
538 | return 0; | ||
539 | } | ||
540 | DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_read_fops, | ||
541 | gk20a_fecs_trace_debugfs_read, NULL, "%llu\n"); | ||
542 | |||
543 | static int gk20a_fecs_trace_debugfs_write(void *arg, u64 *val) | ||
544 | { | ||
545 | *val = gk20a_fecs_trace_get_write_index((struct gk20a *)arg); | ||
546 | return 0; | ||
547 | } | ||
548 | DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_write_fops, | ||
549 | gk20a_fecs_trace_debugfs_write, NULL, "%llu\n"); | ||
550 | |||
551 | static void gk20a_fecs_trace_debugfs_init(struct gk20a *g) | ||
552 | { | ||
553 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
554 | |||
555 | debugfs_create_file("ctxsw_trace_read", 0600, l->debugfs, g, | ||
556 | &gk20a_fecs_trace_debugfs_read_fops); | ||
557 | debugfs_create_file("ctxsw_trace_write", 0600, l->debugfs, g, | ||
558 | &gk20a_fecs_trace_debugfs_write_fops); | ||
559 | debugfs_create_file("ctxsw_trace_ring", 0600, l->debugfs, g, | ||
560 | &gk20a_fecs_trace_debugfs_ring_fops); | ||
561 | } | ||
562 | |||
563 | #else | ||
564 | |||
565 | static void gk20a_fecs_trace_debugfs_init(struct gk20a *g) | ||
566 | { | ||
567 | } | ||
568 | |||
569 | #endif /* CONFIG_DEBUG_FS */ | ||
570 | |||
571 | int gk20a_fecs_trace_init(struct gk20a *g) | ||
572 | { | ||
573 | struct gk20a_fecs_trace *trace; | ||
574 | int err; | ||
575 | |||
576 | trace = nvgpu_kzalloc(g, sizeof(struct gk20a_fecs_trace)); | ||
577 | if (!trace) { | ||
578 | nvgpu_warn(g, "failed to allocate fecs_trace"); | ||
579 | return -ENOMEM; | ||
580 | } | ||
581 | g->fecs_trace = trace; | ||
582 | |||
583 | err = nvgpu_mutex_init(&trace->poll_lock); | ||
584 | if (err) | ||
585 | goto clean; | ||
586 | err = nvgpu_mutex_init(&trace->hash_lock); | ||
587 | if (err) | ||
588 | goto clean_poll_lock; | ||
589 | |||
590 | BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS)); | ||
591 | err = gk20a_fecs_trace_alloc_ring(g); | ||
592 | if (err) { | ||
593 | nvgpu_warn(g, "failed to allocate FECS ring"); | ||
594 | goto clean_hash_lock; | ||
595 | } | ||
596 | |||
597 | hash_init(trace->pid_hash_table); | ||
598 | |||
599 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); | ||
600 | |||
601 | gk20a_fecs_trace_debugfs_init(g); | ||
602 | |||
603 | trace->init = true; | ||
604 | |||
605 | return 0; | ||
606 | |||
607 | clean_hash_lock: | ||
608 | nvgpu_mutex_destroy(&trace->hash_lock); | ||
609 | clean_poll_lock: | ||
610 | nvgpu_mutex_destroy(&trace->poll_lock); | ||
611 | clean: | ||
612 | nvgpu_kfree(g, trace); | ||
613 | g->fecs_trace = NULL; | ||
614 | return err; | ||
615 | } | ||
616 | |||
617 | int gk20a_fecs_trace_bind_channel(struct gk20a *g, | ||
618 | struct channel_gk20a *ch) | ||
619 | { | ||
620 | /* | ||
621 | * map our circ_buf to the context space and store the GPU VA | ||
622 | * in the context header. | ||
623 | */ | ||
624 | |||
625 | u32 lo; | ||
626 | u32 hi; | ||
627 | u64 pa; | ||
628 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | ||
629 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
630 | struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; | ||
631 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); | ||
632 | pid_t pid; | ||
633 | u32 aperture; | ||
634 | |||
635 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, | ||
636 | "chid=%d context_ptr=%x inst_block=%llx", | ||
637 | ch->chid, context_ptr, | ||
638 | nvgpu_inst_block_addr(g, &ch->inst_block)); | ||
639 | |||
640 | if (!trace) | ||
641 | return -ENOMEM; | ||
642 | |||
643 | pa = nvgpu_inst_block_addr(g, &trace->trace_buf); | ||
644 | if (!pa) | ||
645 | return -ENOMEM; | ||
646 | aperture = nvgpu_aperture_mask(g, &trace->trace_buf, | ||
647 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), | ||
648 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); | ||
649 | |||
650 | if (nvgpu_mem_begin(g, mem)) | ||
651 | return -ENOMEM; | ||
652 | |||
653 | lo = u64_lo32(pa); | ||
654 | hi = u64_hi32(pa); | ||
655 | |||
656 | gk20a_dbg(gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi, | ||
657 | lo, GK20A_FECS_TRACE_NUM_RECORDS); | ||
658 | |||
659 | nvgpu_mem_wr(g, mem, | ||
660 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), | ||
661 | lo); | ||
662 | nvgpu_mem_wr(g, mem, | ||
663 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), | ||
664 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) | | ||
665 | aperture); | ||
666 | nvgpu_mem_wr(g, mem, | ||
667 | ctxsw_prog_main_image_context_timestamp_buffer_control_o(), | ||
668 | ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( | ||
669 | GK20A_FECS_TRACE_NUM_RECORDS)); | ||
670 | |||
671 | nvgpu_mem_end(g, mem); | ||
672 | |||
673 | /* pid (process identifier) in user space, corresponds to tgid (thread | ||
674 | * group id) in kernel space. | ||
675 | */ | ||
676 | if (gk20a_is_channel_marked_as_tsg(ch)) | ||
677 | pid = tsg_gk20a_from_ch(ch)->tgid; | ||
678 | else | ||
679 | pid = ch->tgid; | ||
680 | gk20a_fecs_trace_hash_add(g, context_ptr, pid); | ||
681 | |||
682 | return 0; | ||
683 | } | ||
684 | |||
685 | int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch) | ||
686 | { | ||
687 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); | ||
688 | |||
689 | if (g->fecs_trace) { | ||
690 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, | ||
691 | "ch=%p context_ptr=%x", ch, context_ptr); | ||
692 | |||
693 | if (g->ops.fecs_trace.is_enabled(g)) { | ||
694 | if (g->ops.fecs_trace.flush) | ||
695 | g->ops.fecs_trace.flush(g); | ||
696 | gk20a_fecs_trace_poll(g); | ||
697 | } | ||
698 | gk20a_fecs_trace_hash_del(g, context_ptr); | ||
699 | } | ||
700 | return 0; | ||
701 | } | ||
702 | |||
703 | int gk20a_fecs_trace_reset(struct gk20a *g) | ||
704 | { | ||
705 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, ""); | ||
706 | |||
707 | if (!g->ops.fecs_trace.is_enabled(g)) | ||
708 | return 0; | ||
709 | |||
710 | gk20a_fecs_trace_poll(g); | ||
711 | return gk20a_fecs_trace_set_read_index(g, 0); | ||
712 | } | ||
713 | |||
714 | int gk20a_fecs_trace_deinit(struct gk20a *g) | ||
715 | { | ||
716 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
717 | |||
718 | if (!trace->init) | ||
719 | return 0; | ||
720 | |||
721 | nvgpu_thread_stop(&trace->poll_task); | ||
722 | gk20a_fecs_trace_free_ring(g); | ||
723 | gk20a_fecs_trace_free_hash_table(g); | ||
724 | |||
725 | nvgpu_mutex_destroy(&g->fecs_trace->hash_lock); | ||
726 | nvgpu_mutex_destroy(&g->fecs_trace->poll_lock); | ||
727 | |||
728 | nvgpu_kfree(g, g->fecs_trace); | ||
729 | g->fecs_trace = NULL; | ||
730 | return 0; | ||
731 | } | ||
732 | |||
733 | int gk20a_gr_max_entries(struct gk20a *g, | ||
734 | struct nvgpu_ctxsw_trace_filter *filter) | ||
735 | { | ||
736 | int n; | ||
737 | int tag; | ||
738 | |||
739 | /* Compute number of entries per record, with given filter */ | ||
740 | for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(); tag++) | ||
741 | n += (NVGPU_CTXSW_FILTER_ISSET(tag, filter) != 0); | ||
742 | |||
743 | /* Return max number of entries generated for the whole ring */ | ||
744 | return n * GK20A_FECS_TRACE_NUM_RECORDS; | ||
745 | } | ||
746 | |||
747 | int gk20a_fecs_trace_enable(struct gk20a *g) | ||
748 | { | ||
749 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
750 | int write; | ||
751 | int err = 0; | ||
752 | |||
753 | if (!trace) | ||
754 | return -EINVAL; | ||
755 | |||
756 | if (nvgpu_thread_is_running(&trace->poll_task)) | ||
757 | return 0; | ||
758 | |||
759 | /* drop data in hw buffer */ | ||
760 | if (g->ops.fecs_trace.flush) | ||
761 | g->ops.fecs_trace.flush(g); | ||
762 | write = gk20a_fecs_trace_get_write_index(g); | ||
763 | gk20a_fecs_trace_set_read_index(g, write); | ||
764 | |||
765 | err = nvgpu_thread_create(&trace->poll_task, g, | ||
766 | gk20a_fecs_trace_periodic_polling, __func__); | ||
767 | if (err) { | ||
768 | nvgpu_warn(g, | ||
769 | "failed to create FECS polling task"); | ||
770 | return err; | ||
771 | } | ||
772 | |||
773 | return 0; | ||
774 | } | ||
775 | |||
776 | int gk20a_fecs_trace_disable(struct gk20a *g) | ||
777 | { | ||
778 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
779 | |||
780 | if (nvgpu_thread_is_running(&trace->poll_task)) | ||
781 | nvgpu_thread_stop(&trace->poll_task); | ||
782 | |||
783 | return -EPERM; | ||
784 | } | ||
785 | |||
786 | bool gk20a_fecs_trace_is_enabled(struct gk20a *g) | ||
787 | { | ||
788 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
789 | |||
790 | return (trace && nvgpu_thread_is_running(&trace->poll_task)); | ||
791 | } | ||
792 | #endif /* CONFIG_GK20A_CTXSW_TRACE */ | ||